PyPI - letta-nightly - Versions diffs - 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl - Mend

letta-nightly 0.11.6.dev20250902104140py3-none-any.whl → 0.11.7.dev20250904045700py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

letta/__init__.py +1 -1
letta/agent.py +10 -14
letta/agents/base_agent.py +18 -0
letta/agents/helpers.py +32 -7
letta/agents/letta_agent.py +953 -762
letta/agents/voice_agent.py +1 -1
letta/client/streaming.py +0 -1
letta/constants.py +11 -8
letta/errors.py +9 -0
letta/functions/function_sets/base.py +77 -69
letta/functions/function_sets/builtin.py +41 -22
letta/functions/function_sets/multi_agent.py +1 -2
letta/functions/schema_generator.py +0 -1
letta/helpers/converters.py +8 -3
letta/helpers/datetime_helpers.py +5 -4
letta/helpers/message_helper.py +1 -2
letta/helpers/pinecone_utils.py +0 -1
letta/helpers/tool_rule_solver.py +10 -0
letta/helpers/tpuf_client.py +848 -0
letta/interface.py +8 -8
letta/interfaces/anthropic_streaming_interface.py +7 -0
letta/interfaces/openai_streaming_interface.py +29 -6
letta/llm_api/anthropic_client.py +188 -18
letta/llm_api/azure_client.py +0 -1
letta/llm_api/bedrock_client.py +1 -2
letta/llm_api/deepseek_client.py +319 -5
letta/llm_api/google_vertex_client.py +75 -17
letta/llm_api/groq_client.py +0 -1
letta/llm_api/helpers.py +2 -2
letta/llm_api/llm_api_tools.py +1 -50
letta/llm_api/llm_client.py +6 -8
letta/llm_api/mistral.py +1 -1
letta/llm_api/openai.py +16 -13
letta/llm_api/openai_client.py +31 -16
letta/llm_api/together_client.py +0 -1
letta/llm_api/xai_client.py +0 -1
letta/local_llm/chat_completion_proxy.py +7 -6
letta/local_llm/settings/settings.py +1 -1
letta/orm/__init__.py +1 -0
letta/orm/agent.py +8 -6
letta/orm/archive.py +9 -1
letta/orm/block.py +3 -4
letta/orm/block_history.py +3 -1
letta/orm/group.py +2 -3
letta/orm/identity.py +1 -2
letta/orm/job.py +1 -2
letta/orm/llm_batch_items.py +1 -2
letta/orm/message.py +8 -4
letta/orm/mixins.py +18 -0
letta/orm/organization.py +2 -0
letta/orm/passage.py +8 -1
letta/orm/passage_tag.py +55 -0
letta/orm/sandbox_config.py +1 -3
letta/orm/step.py +1 -2
letta/orm/tool.py +1 -0
letta/otel/resource.py +2 -2
letta/plugins/plugins.py +1 -1
letta/prompts/prompt_generator.py +10 -2
letta/schemas/agent.py +11 -0
letta/schemas/archive.py +4 -0
letta/schemas/block.py +13 -0
letta/schemas/embedding_config.py +0 -1
letta/schemas/enums.py +24 -7
letta/schemas/group.py +12 -0
letta/schemas/letta_message.py +55 -1
letta/schemas/letta_message_content.py +28 -0
letta/schemas/letta_request.py +21 -4
letta/schemas/letta_stop_reason.py +9 -1
letta/schemas/llm_config.py +24 -8
letta/schemas/mcp.py +0 -3
letta/schemas/memory.py +14 -0
letta/schemas/message.py +245 -141
letta/schemas/openai/chat_completion_request.py +2 -1
letta/schemas/passage.py +1 -0
letta/schemas/providers/bedrock.py +1 -1
letta/schemas/providers/openai.py +2 -2
letta/schemas/tool.py +11 -5
letta/schemas/tool_execution_result.py +0 -1
letta/schemas/tool_rule.py +71 -0
letta/serialize_schemas/marshmallow_agent.py +1 -2
letta/server/rest_api/app.py +3 -3
letta/server/rest_api/auth/index.py +0 -1
letta/server/rest_api/interface.py +3 -11
letta/server/rest_api/redis_stream_manager.py +3 -4
letta/server/rest_api/routers/v1/agents.py +143 -84
letta/server/rest_api/routers/v1/blocks.py +1 -1
letta/server/rest_api/routers/v1/folders.py +1 -1
letta/server/rest_api/routers/v1/groups.py +23 -22
letta/server/rest_api/routers/v1/internal_templates.py +68 -0
letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
letta/server/rest_api/routers/v1/sources.py +1 -1
letta/server/rest_api/routers/v1/tools.py +167 -15
letta/server/rest_api/streaming_response.py +4 -3
letta/server/rest_api/utils.py +75 -18
letta/server/server.py +24 -35
letta/services/agent_manager.py +359 -45
letta/services/agent_serialization_manager.py +23 -3
letta/services/archive_manager.py +72 -3
letta/services/block_manager.py +1 -2
letta/services/context_window_calculator/token_counter.py +11 -6
letta/services/file_manager.py +1 -3
letta/services/files_agents_manager.py +2 -4
letta/services/group_manager.py +73 -12
letta/services/helpers/agent_manager_helper.py +5 -5
letta/services/identity_manager.py +8 -3
letta/services/job_manager.py +2 -14
letta/services/llm_batch_manager.py +1 -3
letta/services/mcp/base_client.py +1 -2
letta/services/mcp_manager.py +5 -6
letta/services/message_manager.py +536 -15
letta/services/organization_manager.py +1 -2
letta/services/passage_manager.py +287 -12
letta/services/provider_manager.py +1 -3
letta/services/sandbox_config_manager.py +12 -7
letta/services/source_manager.py +1 -2
letta/services/step_manager.py +0 -1
letta/services/summarizer/summarizer.py +4 -2
letta/services/telemetry_manager.py +1 -3
letta/services/tool_executor/builtin_tool_executor.py +136 -316
letta/services/tool_executor/core_tool_executor.py +231 -74
letta/services/tool_executor/files_tool_executor.py +2 -2
letta/services/tool_executor/mcp_tool_executor.py +0 -1
letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
letta/services/tool_executor/sandbox_tool_executor.py +0 -1
letta/services/tool_executor/tool_execution_sandbox.py +2 -3
letta/services/tool_manager.py +181 -64
letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
letta/services/user_manager.py +1 -2
letta/settings.py +5 -3
letta/streaming_interface.py +3 -3
letta/system.py +1 -1
letta/utils.py +0 -1
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
letta/llm_api/deepseek.py +0 -303
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0

letta/agents/letta_agent.py CHANGED Viewed

@@ -40,7 +40,7 @@ from letta.schemas.letta_message_content import OmittedReasoningContent, Reasoni
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.llm_config import LLMConfig
-from letta.schemas.message import Message, MessageCreate
+from letta.schemas.message import Message, MessageCreateBase
 from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
 from letta.schemas.provider_trace import ProviderTraceCreate
 from letta.schemas.step import StepProgression
@@ -48,7 +48,7 @@ from letta.schemas.step_metrics import StepMetrics
 from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
-from letta.server.rest_api.utils import create_letta_messages_from_llm_response
+from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
 from letta.services.agent_manager import AgentManager
 from letta.services.block_manager import BlockManager
 from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
@@ -164,7 +164,7 @@ class LettaAgent(BaseAgent):
     @trace_method
     async def step(
         self,
-        input_messages: list[MessageCreate],
+        input_messages: list[MessageCreateBase],
         max_steps: int = DEFAULT_MAX_STEPS,
         run_id: str | None = None,
         use_assistant_message: bool = True,
@@ -203,7 +203,7 @@ class LettaAgent(BaseAgent):
     @trace_method
     async def step_stream_no_tokens(
         self,
-        input_messages: list[MessageCreate],
+        input_messages: list[MessageCreateBase],
         max_steps: int = DEFAULT_MAX_STEPS,
         use_assistant_message: bool = True,
         request_start_timestamp_ns: int | None = None,
@@ -218,6 +218,7 @@ class LettaAgent(BaseAgent):
             input_messages, agent_state, self.message_manager, self.actor
         )
         initial_messages = new_in_context_messages
+        in_context_messages = current_in_context_messages
         tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
         llm_client = LLMClient.create(
             provider_type=agent_state.llm_config.model_endpoint_type,
@@ -233,137 +234,34 @@ class LettaAgent(BaseAgent):
         request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
         for i in range(max_steps):
-            # Check for job cancellation at the start of each step
-            if await self._check_run_cancellation():
-                stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
-                logger.info(f"Agent execution cancelled for run {self.current_run_id}")
-                yield f"data: {stop_reason.model_dump_json()}\n\n"
-                break
-            step_id = generate_step_id()
-            step_start = get_utc_timestamp_ns()
-            agent_step_span = tracer.start_span("agent_step", start_time=step_start)
-            agent_step_span.set_attributes({"step_id": step_id})
-            step_progression = StepProgression.START
-            should_continue = False
-            step_metrics = StepMetrics(id=step_id)  # Initialize metrics tracking
-            # Create step early with PENDING status
-            logged_step = await self.step_manager.log_step_async(
-                actor=self.actor,
-                agent_id=agent_state.id,
-                provider_name=agent_state.llm_config.model_endpoint_type,
-                provider_category=agent_state.llm_config.provider_category or "base",
-                model=agent_state.llm_config.model,
-                model_endpoint=agent_state.llm_config.model_endpoint,
-                context_window_limit=agent_state.llm_config.context_window,
-                usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
-                provider_id=None,
-                job_id=self.current_run_id if self.current_run_id else None,
-                step_id=step_id,
-                project_id=agent_state.project_id,
-                status=StepStatus.PENDING,
-            )
-            # Only use step_id in messages if step was actually created
-            effective_step_id = step_id if logged_step else None
-            try:
-                request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
-                    await self._build_and_request_from_llm(
-                        current_in_context_messages,
-                        new_in_context_messages,
-                        agent_state,
-                        llm_client,
-                        tool_rules_solver,
-                        agent_step_span,
-                        step_metrics,
-                    )
-                )
-                in_context_messages = current_in_context_messages + new_in_context_messages
-                step_progression = StepProgression.RESPONSE_RECEIVED
-                log_event("agent.stream_no_tokens.llm_response.received")  # [3^]
-                response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
-                # update usage
-                usage.step_count += 1
-                usage.completion_tokens += response.usage.completion_tokens
-                usage.prompt_tokens += response.usage.prompt_tokens
-                usage.total_tokens += response.usage.total_tokens
-                MetricRegistry().message_output_tokens.record(
-                    response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
-                )
-                if not response.choices[0].message.tool_calls:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
-                    raise ValueError("No tool calls found in response, model must make a tool call")
-                tool_call = response.choices[0].message.tool_calls[0]
-                if response.choices[0].message.reasoning_content:
-                    reasoning = [
-                        ReasoningContent(
-                            reasoning=response.choices[0].message.reasoning_content,
-                            is_native=True,
-                            signature=response.choices[0].message.reasoning_content_signature,
-                        )
-                    ]
-                elif response.choices[0].message.omitted_reasoning_content:
-                    reasoning = [OmittedReasoningContent()]
-                elif response.choices[0].message.content:
-                    reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
-                else:
-                    self.logger.info("No reasoning content found.")
-                    reasoning = None
+            if in_context_messages[-1].role == "approval":
+                approval_request_message = in_context_messages[-1]
+                step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
                 persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
-                    tool_call,
-                    valid_tool_names,
+                    approval_request_message.tool_calls[0],
+                    [],  # TODO: update this
                     agent_state,
                     tool_rules_solver,
-                    response.usage,
-                    reasoning_content=reasoning,
-                    step_id=effective_step_id,
+                    usage,
+                    reasoning_content=approval_request_message.content,
+                    step_id=approval_request_message.step_id,
                     initial_messages=initial_messages,
-                    agent_step_span=agent_step_span,
                     is_final_step=(i == max_steps - 1),
                     step_metrics=step_metrics,
+                    run_id=self.current_run_id,
+                    is_approval=input_messages[0].approve,
+                    is_denial=input_messages[0].approve == False,
+                    denial_reason=input_messages[0].reason,
                 )
-                step_progression = StepProgression.STEP_LOGGED
-                # Update step with actual usage now that we have it (if step was created)
-                if logged_step:
-                    await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
-                # TODO (cliandy): handle message contexts with larger refactor and dedupe logic
                 new_message_idx = len(initial_messages) if initial_messages else 0
                 self.response_messages.extend(persisted_messages[new_message_idx:])
                 new_in_context_messages.extend(persisted_messages[new_message_idx:])
                 initial_messages = None
-                log_event("agent.stream_no_tokens.llm_response.processed")  # [4^]
-                # log step time
-                now = get_utc_timestamp_ns()
-                step_ns = now - step_start
-                agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
-                agent_step_span.end()
-                # Log LLM Trace
-                if settings.track_provider_trace:
-                    await self.telemetry_manager.create_provider_trace_async(
-                        actor=self.actor,
-                        provider_trace_create=ProviderTraceCreate(
-                            request_json=request_data,
-                            response_json=response_data,
-                            step_id=step_id,  # Use original step_id for telemetry
-                            organization_id=self.actor.organization_id,
-                        ),
-                    )
-                    step_progression = StepProgression.LOGGED_TRACE
+                in_context_messages = current_in_context_messages + new_in_context_messages
                 # stream step
                 # TODO: improve TTFT
-                filter_user_messages = [m for m in persisted_messages if m.role != "user"]
+                filter_user_messages = [m for m in persisted_messages if m.role != "user" and m.role != "approval"]
                 letta_messages = Message.to_letta_messages_from_list(
                     filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
                 )
@@ -371,104 +269,262 @@ class LettaAgent(BaseAgent):
                 for message in letta_messages:
                     if include_return_message_types is None or message.message_type in include_return_message_types:
                         yield f"data: {message.model_dump_json()}\n\n"
+            else:
+                # Check for job cancellation at the start of each step
+                if await self._check_run_cancellation():
+                    stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
+                    logger.info(f"Agent execution cancelled for run {self.current_run_id}")
+                    yield f"data: {stop_reason.model_dump_json()}\n\n"
+                    break
+                step_id = generate_step_id()
+                step_start = get_utc_timestamp_ns()
+                agent_step_span = tracer.start_span("agent_step", start_time=step_start)
+                agent_step_span.set_attributes({"step_id": step_id})
+                step_progression = StepProgression.START
+                should_continue = False
+                step_metrics = StepMetrics(id=step_id)  # Initialize metrics tracking
+                # Create step early with PENDING status
+                logged_step = await self.step_manager.log_step_async(
+                    actor=self.actor,
+                    agent_id=agent_state.id,
+                    provider_name=agent_state.llm_config.model_endpoint_type,
+                    provider_category=agent_state.llm_config.provider_category or "base",
+                    model=agent_state.llm_config.model,
+                    model_endpoint=agent_state.llm_config.model_endpoint,
+                    context_window_limit=agent_state.llm_config.context_window,
+                    usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
+                    provider_id=None,
+                    job_id=self.current_run_id if self.current_run_id else None,
+                    step_id=step_id,
+                    project_id=agent_state.project_id,
+                    status=StepStatus.PENDING,
+                )
+                # Only use step_id in messages if step was actually created
+                effective_step_id = step_id if logged_step else None
+                try:
+                    (
+                        request_data,
+                        response_data,
+                        current_in_context_messages,
+                        new_in_context_messages,
+                        valid_tool_names,
+                    ) = await self._build_and_request_from_llm(
+                        current_in_context_messages,
+                        new_in_context_messages,
+                        agent_state,
+                        llm_client,
+                        tool_rules_solver,
+                        agent_step_span,
+                        step_metrics,
+                    )
+                    in_context_messages = current_in_context_messages + new_in_context_messages
-                MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
-                step_progression = StepProgression.FINISHED
+                    step_progression = StepProgression.RESPONSE_RECEIVED
+                    log_event("agent.stream_no_tokens.llm_response.received")  # [3^]
-                # Record step metrics for successful completion
-                if logged_step and step_metrics:
-                    # Set the step_ns that was already calculated
-                    step_metrics.step_ns = step_ns
-                    await self._record_step_metrics(
-                        step_id=step_id,
-                        agent_state=agent_state,
-                        step_metrics=step_metrics,
+                    try:
+                        response = llm_client.convert_response_to_chat_completion(
+                            response_data, in_context_messages, agent_state.llm_config
+                        )
+                    except ValueError as e:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
+                        raise e
+                    # update usage
+                    usage.step_count += 1
+                    usage.completion_tokens += response.usage.completion_tokens
+                    usage.prompt_tokens += response.usage.prompt_tokens
+                    usage.total_tokens += response.usage.total_tokens
+                    MetricRegistry().message_output_tokens.record(
+                        response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
                     )
-            except Exception as e:
-                # Handle any unexpected errors during step processing
-                self.logger.error(f"Error during step processing: {e}")
-                job_update_metadata = {"error": str(e)}
-                # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
-                if not stop_reason:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
-                elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
-                    self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
-                elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
-                    raise ValueError(f"Invalid Stop Reason: {stop_reason}")
-                # Send error stop reason to client and re-raise
-                yield f"data: {stop_reason.model_dump_json()}\n\n", 500
-                raise
-            # Update step if it needs to be updated
-            finally:
-                if step_progression == StepProgression.FINISHED and should_continue:
-                    continue
-                self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
-                self.logger.info("Running final update. Step Progression: %s", step_progression)
-                try:
-                    if step_progression == StepProgression.FINISHED and not should_continue:
-                        # Successfully completed - update with final usage and stop reason
-                        if stop_reason is None:
-                            stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
-                        # Note: step already updated with success status after _handle_ai_response
-                        if logged_step:
-                            await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
-                        break
-                    # Handle error cases
-                    if step_progression < StepProgression.STEP_LOGGED:
-                        # Error occurred before step was fully logged
-                        import traceback
-                        if logged_step:
-                            await self.step_manager.update_step_error_async(
-                                actor=self.actor,
-                                step_id=step_id,  # Use original step_id for telemetry
-                                error_type=type(e).__name__ if "e" in locals() else "Unknown",
-                                error_message=str(e) if "e" in locals() else "Unknown error",
-                                error_traceback=traceback.format_exc(),
-                                stop_reason=stop_reason,
+                    if not response.choices[0].message.tool_calls:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
+                        raise ValueError("No tool calls found in response, model must make a tool call")
+                    tool_call = response.choices[0].message.tool_calls[0]
+                    if response.choices[0].message.reasoning_content:
+                        reasoning = [
+                            ReasoningContent(
+                                reasoning=response.choices[0].message.reasoning_content,
+                                is_native=True,
+                                signature=response.choices[0].message.reasoning_content_signature,
                             )
-                    if step_progression <= StepProgression.RESPONSE_RECEIVED:
-                        # TODO (cliandy): persist response if we get it back
-                        if settings.track_errored_messages and initial_messages:
-                            for message in initial_messages:
-                                message.is_err = True
-                                message.step_id = effective_step_id
-                            await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
-                    elif step_progression <= StepProgression.LOGGED_TRACE:
-                        if stop_reason is None:
-                            self.logger.error("Error in step after logging step")
-                            stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
-                        if logged_step:
-                            await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
+                        ]
+                    elif response.choices[0].message.omitted_reasoning_content:
+                        reasoning = [OmittedReasoningContent()]
+                    elif response.choices[0].message.content:
+                        reasoning = [
+                            TextContent(text=response.choices[0].message.content)
+                        ]  # reasoning placed into content for legacy reasons
                     else:
-                        self.logger.error("Invalid StepProgression value")
+                        self.logger.info("No reasoning content found.")
+                        reasoning = None
-                    if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
+                    persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
+                        tool_call,
+                        valid_tool_names,
+                        agent_state,
+                        tool_rules_solver,
+                        response.usage,
+                        reasoning_content=reasoning,
+                        step_id=effective_step_id,
+                        initial_messages=initial_messages,
+                        agent_step_span=agent_step_span,
+                        is_final_step=(i == max_steps - 1),
+                        step_metrics=step_metrics,
+                    )
+                    step_progression = StepProgression.STEP_LOGGED
+                    # Update step with actual usage now that we have it (if step was created)
+                    if logged_step:
+                        await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
+                    # TODO (cliandy): handle message contexts with larger refactor and dedupe logic
+                    new_message_idx = len(initial_messages) if initial_messages else 0
+                    self.response_messages.extend(persisted_messages[new_message_idx:])
+                    new_in_context_messages.extend(persisted_messages[new_message_idx:])
+                    initial_messages = None
+                    log_event("agent.stream_no_tokens.llm_response.processed")  # [4^]
+                    # log step time
+                    now = get_utc_timestamp_ns()
+                    step_ns = now - step_start
+                    agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
+                    agent_step_span.end()
+                    # Log LLM Trace
+                    if settings.track_provider_trace:
+                        await self.telemetry_manager.create_provider_trace_async(
+                            actor=self.actor,
+                            provider_trace_create=ProviderTraceCreate(
+                                request_json=request_data,
+                                response_json=response_data,
+                                step_id=step_id,  # Use original step_id for telemetry
+                                organization_id=self.actor.organization_id,
+                            ),
+                        )
+                        step_progression = StepProgression.LOGGED_TRACE
+                    # stream step
+                    # TODO: improve TTFT
+                    filter_user_messages = [m for m in persisted_messages if m.role != "user"]
+                    letta_messages = Message.to_letta_messages_from_list(
+                        filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
+                    )
+                    letta_messages = [m for m in letta_messages if m.message_type != "approval_response_message"]
+                    for message in letta_messages:
+                        if include_return_message_types is None or message.message_type in include_return_message_types:
+                            yield f"data: {message.model_dump_json()}\n\n"
+                    MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
+                    step_progression = StepProgression.FINISHED
-                    # Record partial step metrics on failure (capture whatever timing data we have)
-                    if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
-                        # Calculate total step time up to the failure point
-                        step_metrics.step_ns = get_utc_timestamp_ns() - step_start
+                    # Record step metrics for successful completion
+                    if logged_step and step_metrics:
+                        # Set the step_ns that was already calculated
+                        step_metrics.step_ns = step_ns
                         await self._record_step_metrics(
                             step_id=step_id,
                             agent_state=agent_state,
                             step_metrics=step_metrics,
-                            job_id=locals().get("run_id", self.current_run_id),
                         )
                 except Exception as e:
-                    self.logger.error("Failed to update step: %s", e)
+                    # Handle any unexpected errors during step processing
+                    self.logger.error(f"Error during step processing: {e}")
+                    job_update_metadata = {"error": str(e)}
+                    # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
+                    if not stop_reason:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+                    elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
+                        self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
+                    elif stop_reason.stop_reason not in (
+                        StopReasonType.no_tool_call,
+                        StopReasonType.invalid_tool_call,
+                        StopReasonType.invalid_llm_response,
+                    ):
+                        self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
+                    # Send error stop reason to client and re-raise
+                    yield f"data: {stop_reason.model_dump_json()}\n\n", 500
+                    raise
-            if not should_continue:
-                break
+                # Update step if it needs to be updated
+                finally:
+                    if step_progression == StepProgression.FINISHED and should_continue:
+                        continue
+                    self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
+                    self.logger.info("Running final update. Step Progression: %s", step_progression)
+                    try:
+                        if step_progression == StepProgression.FINISHED and not should_continue:
+                            # Successfully completed - update with final usage and stop reason
+                            if stop_reason is None:
+                                stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
+                            # Note: step already updated with success status after _handle_ai_response
+                            if logged_step:
+                                await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
+                            break
+                        # Handle error cases
+                        if step_progression < StepProgression.STEP_LOGGED:
+                            # Error occurred before step was fully logged
+                            import traceback
+                            if logged_step:
+                                await self.step_manager.update_step_error_async(
+                                    actor=self.actor,
+                                    step_id=step_id,  # Use original step_id for telemetry
+                                    error_type=type(e).__name__ if "e" in locals() else "Unknown",
+                                    error_message=str(e) if "e" in locals() else "Unknown error",
+                                    error_traceback=traceback.format_exc(),
+                                    stop_reason=stop_reason,
+                                )
+                        if step_progression <= StepProgression.RESPONSE_RECEIVED:
+                            # TODO (cliandy): persist response if we get it back
+                            if settings.track_errored_messages and initial_messages:
+                                for message in initial_messages:
+                                    message.is_err = True
+                                    message.step_id = effective_step_id
+                                await self.message_manager.create_many_messages_async(
+                                    initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
+                                )
+                        elif step_progression <= StepProgression.LOGGED_TRACE:
+                            if stop_reason is None:
+                                self.logger.error("Error in step after logging step")
+                                stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+                            if logged_step:
+                                await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
+                        else:
+                            self.logger.error("Invalid StepProgression value")
+                        if settings.track_stop_reason:
+                            await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
+                        # Record partial step metrics on failure (capture whatever timing data we have)
+                        if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
+                            # Calculate total step time up to the failure point
+                            step_metrics.step_ns = get_utc_timestamp_ns() - step_start
+                            await self._record_step_metrics(
+                                step_id=step_id,
+                                agent_state=agent_state,
+                                step_metrics=step_metrics,
+                                job_id=locals().get("run_id", self.current_run_id),
+                            )
+                    except Exception as e:
+                        self.logger.error("Failed to update step: %s", e)
+                if not should_continue:
+                    break
         # Extend the in context message ids
         if not agent_state.message_buffer_autoclear:
@@ -489,7 +545,7 @@ class LettaAgent(BaseAgent):
     async def _step(
         self,
         agent_state: AgentState,
-        input_messages: list[MessageCreate],
+        input_messages: list[MessageCreateBase],
         max_steps: int = DEFAULT_MAX_STEPS,
         run_id: str | None = None,
         request_start_timestamp_ns: int | None = None,
@@ -506,6 +562,7 @@ class LettaAgent(BaseAgent):
             input_messages, agent_state, self.message_manager, self.actor
         )
         initial_messages = new_in_context_messages
+        in_context_messages = current_in_context_messages
         tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
         llm_client = LLMClient.create(
             provider_type=agent_state.llm_config.model_endpoint_type,
@@ -521,53 +578,83 @@ class LettaAgent(BaseAgent):
         job_update_metadata = None
         usage = LettaUsageStatistics()
         for i in range(max_steps):
-            # If dry run, build request data and return it without making LLM call
-            if dry_run:
-                request_data, valid_tool_names = await self._create_llm_request_data_async(
-                    llm_client=llm_client,
-                    in_context_messages=current_in_context_messages + new_in_context_messages,
-                    agent_state=agent_state,
-                    tool_rules_solver=tool_rules_solver,
+            if in_context_messages[-1].role == "approval":
+                approval_request_message = in_context_messages[-1]
+                step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
+                persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
+                    approval_request_message.tool_calls[0],
+                    [],  # TODO: update this
+                    agent_state,
+                    tool_rules_solver,
+                    usage,
+                    reasoning_content=approval_request_message.content,
+                    step_id=approval_request_message.step_id,
+                    initial_messages=initial_messages,
+                    is_final_step=(i == max_steps - 1),
+                    step_metrics=step_metrics,
+                    run_id=run_id or self.current_run_id,
+                    is_approval=input_messages[0].approve,
+                    is_denial=input_messages[0].approve == False,
+                    denial_reason=input_messages[0].reason,
                 )
-                return request_data
+                new_message_idx = len(initial_messages) if initial_messages else 0
+                self.response_messages.extend(persisted_messages[new_message_idx:])
+                new_in_context_messages.extend(persisted_messages[new_message_idx:])
+                initial_messages = None
+                in_context_messages = current_in_context_messages + new_in_context_messages
+            else:
+                # If dry run, build request data and return it without making LLM call
+                if dry_run:
+                    request_data, valid_tool_names = await self._create_llm_request_data_async(
+                        llm_client=llm_client,
+                        in_context_messages=current_in_context_messages + new_in_context_messages,
+                        agent_state=agent_state,
+                        tool_rules_solver=tool_rules_solver,
+                    )
+                    return request_data
-            # Check for job cancellation at the start of each step
-            if await self._check_run_cancellation():
-                stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
-                logger.info(f"Agent execution cancelled for run {self.current_run_id}")
-                break
+                # Check for job cancellation at the start of each step
+                if await self._check_run_cancellation():
+                    stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
+                    logger.info(f"Agent execution cancelled for run {self.current_run_id}")
+                    break
-            step_id = generate_step_id()
-            step_start = get_utc_timestamp_ns()
-            agent_step_span = tracer.start_span("agent_step", start_time=step_start)
-            agent_step_span.set_attributes({"step_id": step_id})
+                step_id = generate_step_id()
+                step_start = get_utc_timestamp_ns()
+                agent_step_span = tracer.start_span("agent_step", start_time=step_start)
+                agent_step_span.set_attributes({"step_id": step_id})
-            step_progression = StepProgression.START
-            should_continue = False
-            step_metrics = StepMetrics(id=step_id)  # Initialize metrics tracking
+                step_progression = StepProgression.START
+                should_continue = False
+                step_metrics = StepMetrics(id=step_id)  # Initialize metrics tracking
-            # Create step early with PENDING status
-            logged_step = await self.step_manager.log_step_async(
-                actor=self.actor,
-                agent_id=agent_state.id,
-                provider_name=agent_state.llm_config.model_endpoint_type,
-                provider_category=agent_state.llm_config.provider_category or "base",
-                model=agent_state.llm_config.model,
-                model_endpoint=agent_state.llm_config.model_endpoint,
-                context_window_limit=agent_state.llm_config.context_window,
-                usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
-                provider_id=None,
-                job_id=run_id if run_id else self.current_run_id,
-                step_id=step_id,
-                project_id=agent_state.project_id,
-                status=StepStatus.PENDING,
-            )
-            # Only use step_id in messages if step was actually created
-            effective_step_id = step_id if logged_step else None
+                # Create step early with PENDING status
+                logged_step = await self.step_manager.log_step_async(
+                    actor=self.actor,
+                    agent_id=agent_state.id,
+                    provider_name=agent_state.llm_config.model_endpoint_type,
+                    provider_category=agent_state.llm_config.provider_category or "base",
+                    model=agent_state.llm_config.model,
+                    model_endpoint=agent_state.llm_config.model_endpoint,
+                    context_window_limit=agent_state.llm_config.context_window,
+                    usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
+                    provider_id=None,
+                    job_id=run_id if run_id else self.current_run_id,
+                    step_id=step_id,
+                    project_id=agent_state.project_id,
+                    status=StepStatus.PENDING,
+                )
+                # Only use step_id in messages if step was actually created
+                effective_step_id = step_id if logged_step else None
-            try:
-                request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
-                    await self._build_and_request_from_llm(
+                try:
+                    (
+                        request_data,
+                        response_data,
+                        current_in_context_messages,
+                        new_in_context_messages,
+                        valid_tool_names,
+                    ) = await self._build_and_request_from_llm(
                         current_in_context_messages,
                         new_in_context_messages,
                         agent_state,
@@ -576,180 +663,193 @@ class LettaAgent(BaseAgent):
                         agent_step_span,
                         step_metrics,
                     )
-                )
-                in_context_messages = current_in_context_messages + new_in_context_messages
-                step_progression = StepProgression.RESPONSE_RECEIVED
-                log_event("agent.step.llm_response.received")  # [3^]
-                response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
+                    in_context_messages = current_in_context_messages + new_in_context_messages
-                usage.step_count += 1
-                usage.completion_tokens += response.usage.completion_tokens
-                usage.prompt_tokens += response.usage.prompt_tokens
-                usage.total_tokens += response.usage.total_tokens
-                usage.run_ids = [run_id] if run_id else None
-                MetricRegistry().message_output_tokens.record(
-                    response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
-                )
+                    step_progression = StepProgression.RESPONSE_RECEIVED
+                    log_event("agent.step.llm_response.received")  # [3^]
-                if not response.choices[0].message.tool_calls:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
-                    raise ValueError("No tool calls found in response, model must make a tool call")
-                tool_call = response.choices[0].message.tool_calls[0]
-                if response.choices[0].message.reasoning_content:
-                    reasoning = [
-                        ReasoningContent(
-                            reasoning=response.choices[0].message.reasoning_content,
-                            is_native=True,
-                            signature=response.choices[0].message.reasoning_content_signature,
+                    try:
+                        response = llm_client.convert_response_to_chat_completion(
+                            response_data, in_context_messages, agent_state.llm_config
                         )
-                    ]
-                elif response.choices[0].message.content:
-                    reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
-                elif response.choices[0].message.omitted_reasoning_content:
-                    reasoning = [OmittedReasoningContent()]
-                else:
-                    self.logger.info("No reasoning content found.")
-                    reasoning = None
-                persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
-                    tool_call,
-                    valid_tool_names,
-                    agent_state,
-                    tool_rules_solver,
-                    response.usage,
-                    reasoning_content=reasoning,
-                    step_id=effective_step_id,
-                    initial_messages=initial_messages,
-                    agent_step_span=agent_step_span,
-                    is_final_step=(i == max_steps - 1),
-                    run_id=run_id,
-                    step_metrics=step_metrics,
-                )
-                step_progression = StepProgression.STEP_LOGGED
-                # Update step with actual usage now that we have it (if step was created)
-                if logged_step:
-                    await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
-                new_message_idx = len(initial_messages) if initial_messages else 0
-                self.response_messages.extend(persisted_messages[new_message_idx:])
-                new_in_context_messages.extend(persisted_messages[new_message_idx:])
-                initial_messages = None
-                log_event("agent.step.llm_response.processed")  # [4^]
-                # log step time
-                now = get_utc_timestamp_ns()
-                step_ns = now - step_start
-                agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
-                agent_step_span.end()
-                # Log LLM Trace
-                if settings.track_provider_trace:
-                    await self.telemetry_manager.create_provider_trace_async(
-                        actor=self.actor,
-                        provider_trace_create=ProviderTraceCreate(
-                            request_json=request_data,
-                            response_json=response_data,
-                            step_id=step_id,  # Use original step_id for telemetry
-                            organization_id=self.actor.organization_id,
-                        ),
+                    except ValueError as e:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
+                        raise e
+                    usage.step_count += 1
+                    usage.completion_tokens += response.usage.completion_tokens
+                    usage.prompt_tokens += response.usage.prompt_tokens
+                    usage.total_tokens += response.usage.total_tokens
+                    usage.run_ids = [run_id] if run_id else None
+                    MetricRegistry().message_output_tokens.record(
+                        response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
                     )
-                    step_progression = StepProgression.LOGGED_TRACE
-                MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
-                step_progression = StepProgression.FINISHED
+                    if not response.choices[0].message.tool_calls:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
+                        raise ValueError("No tool calls found in response, model must make a tool call")
+                    tool_call = response.choices[0].message.tool_calls[0]
+                    if response.choices[0].message.reasoning_content:
+                        reasoning = [
+                            ReasoningContent(
+                                reasoning=response.choices[0].message.reasoning_content,
+                                is_native=True,
+                                signature=response.choices[0].message.reasoning_content_signature,
+                            )
+                        ]
+                    elif response.choices[0].message.content:
+                        reasoning = [
+                            TextContent(text=response.choices[0].message.content)
+                        ]  # reasoning placed into content for legacy reasons
+                    elif response.choices[0].message.omitted_reasoning_content:
+                        reasoning = [OmittedReasoningContent()]
+                    else:
+                        self.logger.info("No reasoning content found.")
+                        reasoning = None
-                # Record step metrics for successful completion
-                if logged_step and step_metrics:
-                    # Set the step_ns that was already calculated
-                    step_metrics.step_ns = step_ns
-                    await self._record_step_metrics(
-                        step_id=step_id,
-                        agent_state=agent_state,
+                    persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
+                        tool_call,
+                        valid_tool_names,
+                        agent_state,
+                        tool_rules_solver,
+                        response.usage,
+                        reasoning_content=reasoning,
+                        step_id=effective_step_id,
+                        initial_messages=initial_messages,
+                        agent_step_span=agent_step_span,
+                        is_final_step=(i == max_steps - 1),
+                        run_id=run_id,
                         step_metrics=step_metrics,
-                        job_id=run_id if run_id else self.current_run_id,
                     )
-            except Exception as e:
-                # Handle any unexpected errors during step processing
-                self.logger.error(f"Error during step processing: {e}")
-                job_update_metadata = {"error": str(e)}
-                # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
-                if not stop_reason:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
-                elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
-                    self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
-                elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
-                    raise ValueError(f"Invalid Stop Reason: {stop_reason}")
-                raise
-                # Update step if it needs to be updated
-            finally:
-                if step_progression == StepProgression.FINISHED and should_continue:
-                    continue
-                self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
-                self.logger.info("Running final update. Step Progression: %s", step_progression)
-                try:
-                    if step_progression == StepProgression.FINISHED and not should_continue:
-                        # Successfully completed - update with final usage and stop reason
-                        if stop_reason is None:
-                            stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
-                        if logged_step:
-                            await self.step_manager.update_step_success_async(self.actor, step_id, usage, stop_reason)
-                        break
-                    # Handle error cases
-                    if step_progression < StepProgression.STEP_LOGGED:
-                        # Error occurred before step was fully logged
-                        import traceback
-                        if logged_step:
-                            await self.step_manager.update_step_error_async(
-                                actor=self.actor,
+                    step_progression = StepProgression.STEP_LOGGED
+                    # Update step with actual usage now that we have it (if step was created)
+                    if logged_step:
+                        await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
+                    new_message_idx = len(initial_messages) if initial_messages else 0
+                    self.response_messages.extend(persisted_messages[new_message_idx:])
+                    new_in_context_messages.extend(persisted_messages[new_message_idx:])
+                    initial_messages = None
+                    log_event("agent.step.llm_response.processed")  # [4^]
+                    # log step time
+                    now = get_utc_timestamp_ns()
+                    step_ns = now - step_start
+                    agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
+                    agent_step_span.end()
+                    # Log LLM Trace
+                    if settings.track_provider_trace:
+                        await self.telemetry_manager.create_provider_trace_async(
+                            actor=self.actor,
+                            provider_trace_create=ProviderTraceCreate(
+                                request_json=request_data,
+                                response_json=response_data,
                                 step_id=step_id,  # Use original step_id for telemetry
-                                error_type=type(e).__name__ if "e" in locals() else "Unknown",
-                                error_message=str(e) if "e" in locals() else "Unknown error",
-                                error_traceback=traceback.format_exc(),
-                                stop_reason=stop_reason,
-                            )
-                    if step_progression <= StepProgression.RESPONSE_RECEIVED:
-                        # TODO (cliandy): persist response if we get it back
-                        if settings.track_errored_messages and initial_messages:
-                            for message in initial_messages:
-                                message.is_err = True
-                                message.step_id = effective_step_id
-                            await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
-                    elif step_progression <= StepProgression.LOGGED_TRACE:
-                        if stop_reason is None:
-                            self.logger.error("Error in step after logging step")
-                            stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
-                        if logged_step:
-                            await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
-                    else:
-                        self.logger.error("Invalid StepProgression value")
+                                organization_id=self.actor.organization_id,
+                            ),
+                        )
+                        step_progression = StepProgression.LOGGED_TRACE
-                    if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
+                    MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
+                    step_progression = StepProgression.FINISHED
-                    # Record partial step metrics on failure (capture whatever timing data we have)
-                    if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
-                        # Calculate total step time up to the failure point
-                        step_metrics.step_ns = get_utc_timestamp_ns() - step_start
+                    # Record step metrics for successful completion
+                    if logged_step and step_metrics:
+                        # Set the step_ns that was already calculated
+                        step_metrics.step_ns = step_ns
                         await self._record_step_metrics(
                             step_id=step_id,
                             agent_state=agent_state,
                             step_metrics=step_metrics,
-                            job_id=locals().get("run_id", self.current_run_id),
+                            job_id=run_id if run_id else self.current_run_id,
                         )
                 except Exception as e:
-                    self.logger.error("Failed to update step: %s", e)
+                    # Handle any unexpected errors during step processing
+                    self.logger.error(f"Error during step processing: {e}")
+                    job_update_metadata = {"error": str(e)}
+                    # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
+                    if not stop_reason:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+                    elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
+                        self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
+                    elif stop_reason.stop_reason not in (
+                        StopReasonType.no_tool_call,
+                        StopReasonType.invalid_tool_call,
+                        StopReasonType.invalid_llm_response,
+                    ):
+                        self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
+                    raise
+                    # Update step if it needs to be updated
+                finally:
+                    if step_progression == StepProgression.FINISHED and should_continue:
+                        continue
+                    self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
+                    self.logger.info("Running final update. Step Progression: %s", step_progression)
+                    try:
+                        if step_progression == StepProgression.FINISHED and not should_continue:
+                            # Successfully completed - update with final usage and stop reason
+                            if stop_reason is None:
+                                stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
+                            if logged_step:
+                                await self.step_manager.update_step_success_async(self.actor, step_id, usage, stop_reason)
+                            break
+                        # Handle error cases
+                        if step_progression < StepProgression.STEP_LOGGED:
+                            # Error occurred before step was fully logged
+                            import traceback
+                            if logged_step:
+                                await self.step_manager.update_step_error_async(
+                                    actor=self.actor,
+                                    step_id=step_id,  # Use original step_id for telemetry
+                                    error_type=type(e).__name__ if "e" in locals() else "Unknown",
+                                    error_message=str(e) if "e" in locals() else "Unknown error",
+                                    error_traceback=traceback.format_exc(),
+                                    stop_reason=stop_reason,
+                                )
+                        if step_progression <= StepProgression.RESPONSE_RECEIVED:
+                            # TODO (cliandy): persist response if we get it back
+                            if settings.track_errored_messages and initial_messages:
+                                for message in initial_messages:
+                                    message.is_err = True
+                                    message.step_id = effective_step_id
+                                await self.message_manager.create_many_messages_async(
+                                    initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
+                                )
+                        elif step_progression <= StepProgression.LOGGED_TRACE:
+                            if stop_reason is None:
+                                self.logger.error("Error in step after logging step")
+                                stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+                            if logged_step:
+                                await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
+                        else:
+                            self.logger.error("Invalid StepProgression value")
+                        if settings.track_stop_reason:
+                            await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
+                        # Record partial step metrics on failure (capture whatever timing data we have)
+                        if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
+                            # Calculate total step time up to the failure point
+                            step_metrics.step_ns = get_utc_timestamp_ns() - step_start
+                            await self._record_step_metrics(
+                                step_id=step_id,
+                                agent_state=agent_state,
+                                step_metrics=step_metrics,
+                                job_id=locals().get("run_id", self.current_run_id),
+                            )
+                    except Exception as e:
+                        self.logger.error("Failed to update step: %s", e)
             if not should_continue:
                 break
@@ -783,7 +883,7 @@ class LettaAgent(BaseAgent):
     @trace_method
     async def step_stream(
         self,
-        input_messages: list[MessageCreate],
+        input_messages: list[MessageCreateBase],
         max_steps: int = DEFAULT_MAX_STEPS,
         use_assistant_message: bool = True,
         request_start_timestamp_ns: int | None = None,
@@ -806,6 +906,7 @@ class LettaAgent(BaseAgent):
             input_messages, agent_state, self.message_manager, self.actor
         )
         initial_messages = new_in_context_messages
+        in_context_messages = current_in_context_messages
         tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
         llm_client = LLMClient.create(
@@ -822,219 +923,30 @@ class LettaAgent(BaseAgent):
             request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
         for i in range(max_steps):
-            step_id = generate_step_id()
-            # Check for job cancellation at the start of each step
-            if await self._check_run_cancellation():
-                stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
-                logger.info(f"Agent execution cancelled for run {self.current_run_id}")
-                yield f"data: {stop_reason.model_dump_json()}\n\n"
-                break
-            step_start = get_utc_timestamp_ns()
-            agent_step_span = tracer.start_span("agent_step", start_time=step_start)
-            agent_step_span.set_attributes({"step_id": step_id})
-            step_progression = StepProgression.START
-            should_continue = False
-            step_metrics = StepMetrics(id=step_id)  # Initialize metrics tracking
-            # Create step early with PENDING status
-            logged_step = await self.step_manager.log_step_async(
-                actor=self.actor,
-                agent_id=agent_state.id,
-                provider_name=agent_state.llm_config.model_endpoint_type,
-                provider_category=agent_state.llm_config.provider_category or "base",
-                model=agent_state.llm_config.model,
-                model_endpoint=agent_state.llm_config.model_endpoint,
-                context_window_limit=agent_state.llm_config.context_window,
-                usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
-                provider_id=None,
-                job_id=self.current_run_id if self.current_run_id else None,
-                step_id=step_id,
-                project_id=agent_state.project_id,
-                status=StepStatus.PENDING,
-            )
-            # Only use step_id in messages if step was actually created
-            effective_step_id = step_id if logged_step else None
-            try:
-                (
-                    request_data,
-                    stream,
-                    current_in_context_messages,
-                    new_in_context_messages,
-                    valid_tool_names,
-                    provider_request_start_timestamp_ns,
-                ) = await self._build_and_request_from_llm_streaming(
-                    first_chunk,
-                    agent_step_span,
-                    request_start_timestamp_ns,
-                    current_in_context_messages,
-                    new_in_context_messages,
-                    agent_state,
-                    llm_client,
-                    tool_rules_solver,
-                )
-                step_progression = StepProgression.STREAM_RECEIVED
-                log_event("agent.stream.llm_response.received")  # [3^]
-                # TODO: THIS IS INCREDIBLY UGLY
-                # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
-                if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
-                    interface = AnthropicStreamingInterface(
-                        use_assistant_message=use_assistant_message,
-                        put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
-                    )
-                elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
-                    interface = OpenAIStreamingInterface(
-                        use_assistant_message=use_assistant_message,
-                        is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
-                        messages=current_in_context_messages + new_in_context_messages,
-                        tools=request_data.get("tools", []),
-                    )
-                else:
-                    raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
-                async for chunk in interface.process(
-                    stream,
-                    ttft_span=request_span,
-                ):
-                    # Measure TTFT (trace, metric, and db). This should be consolidated.
-                    if first_chunk and request_span is not None:
-                        now = get_utc_timestamp_ns()
-                        ttft_ns = now - request_start_timestamp_ns
-                        request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
-                        metric_attributes = get_ctx_attributes()
-                        metric_attributes["model.name"] = agent_state.llm_config.model
-                        MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
-                        if self.current_run_id and self.job_manager:
-                            await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
-                        first_chunk = False
-                    if include_return_message_types is None or chunk.message_type in include_return_message_types:
-                        # filter down returned data
-                        yield f"data: {chunk.model_dump_json()}\n\n"
-                stream_end_time_ns = get_utc_timestamp_ns()
-                # Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
-                if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
-                    logger.warning(
-                        f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
-                    )
-                    interface.input_tokens = interface.fallback_input_tokens
-                    interface.output_tokens = interface.fallback_output_tokens
-                usage.step_count += 1
-                usage.completion_tokens += interface.output_tokens
-                usage.prompt_tokens += interface.input_tokens
-                usage.total_tokens += interface.input_tokens + interface.output_tokens
-                MetricRegistry().message_output_tokens.record(
-                    usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
-                )
-                # log LLM request time
-                llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns
-                step_metrics.llm_request_ns = llm_request_ns
-                llm_request_ms = ns_to_ms(llm_request_ns)
-                agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
-                MetricRegistry().llm_execution_time_ms_histogram.record(
-                    llm_request_ms,
-                    dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
-                )
-                # Process resulting stream content
-                try:
-                    tool_call = interface.get_tool_call_object()
-                except ValueError as e:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
-                    raise e
-                except Exception as e:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
-                    raise e
-                reasoning_content = interface.get_reasoning_content()
+            if in_context_messages[-1].role == "approval":
+                approval_request_message = in_context_messages[-1]
+                step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
                 persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
-                    tool_call,
-                    valid_tool_names,
+                    approval_request_message.tool_calls[0],
+                    [],  # TODO: update this
                     agent_state,
                     tool_rules_solver,
-                    UsageStatistics(
-                        completion_tokens=usage.completion_tokens,
-                        prompt_tokens=usage.prompt_tokens,
-                        total_tokens=usage.total_tokens,
-                    ),
-                    reasoning_content=reasoning_content,
-                    pre_computed_assistant_message_id=interface.letta_message_id,
-                    step_id=effective_step_id,
-                    initial_messages=initial_messages,
-                    agent_step_span=agent_step_span,
+                    usage,
+                    reasoning_content=approval_request_message.content,
+                    step_id=approval_request_message.step_id,
+                    initial_messages=new_in_context_messages,
                     is_final_step=(i == max_steps - 1),
                     step_metrics=step_metrics,
+                    run_id=self.current_run_id,
+                    is_approval=input_messages[0].approve,
+                    is_denial=input_messages[0].approve == False,
+                    denial_reason=input_messages[0].reason,
                 )
-                step_progression = StepProgression.STEP_LOGGED
-                # Update step with actual usage now that we have it (if step was created)
-                if logged_step:
-                    await self.step_manager.update_step_success_async(
-                        self.actor,
-                        step_id,
-                        UsageStatistics(
-                            completion_tokens=usage.completion_tokens,
-                            prompt_tokens=usage.prompt_tokens,
-                            total_tokens=usage.total_tokens,
-                        ),
-                        stop_reason,
-                    )
                 new_message_idx = len(initial_messages) if initial_messages else 0
                 self.response_messages.extend(persisted_messages[new_message_idx:])
                 new_in_context_messages.extend(persisted_messages[new_message_idx:])
                 initial_messages = None
-                # log total step time
-                now = get_utc_timestamp_ns()
-                step_ns = now - step_start
-                agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
-                agent_step_span.end()
-                # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
-                # log_event("agent.stream.llm_response.processed") # [4^]
-                # Log LLM Trace
-                # We are piecing together the streamed response here.
-                # Content here does not match the actual response schema as streams come in chunks.
-                if settings.track_provider_trace:
-                    await self.telemetry_manager.create_provider_trace_async(
-                        actor=self.actor,
-                        provider_trace_create=ProviderTraceCreate(
-                            request_json=request_data,
-                            response_json={
-                                "content": {
-                                    "tool_call": tool_call.model_dump_json(),
-                                    "reasoning": [content.model_dump_json() for content in reasoning_content],
-                                },
-                                "id": interface.message_id,
-                                "model": interface.model,
-                                "role": "assistant",
-                                # "stop_reason": "",
-                                # "stop_sequence": None,
-                                "type": "message",
-                                "usage": {
-                                    "input_tokens": usage.prompt_tokens,
-                                    "output_tokens": usage.completion_tokens,
-                                },
-                            },
-                            step_id=step_id,  # Use original step_id for telemetry
-                            organization_id=self.actor.organization_id,
-                        ),
-                    )
-                    step_progression = StepProgression.LOGGED_TRACE
+                in_context_messages = current_in_context_messages + new_in_context_messages
                 # yields tool response as this is handled from Letta and not the response from the LLM provider
                 tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
@@ -1042,103 +954,238 @@ class LettaAgent(BaseAgent):
                     # Apply message type filtering if specified
                     if include_return_message_types is None or tool_return.message_type in include_return_message_types:
                         yield f"data: {tool_return.model_dump_json()}\n\n"
+            else:
+                step_id = generate_step_id()
+                # Check for job cancellation at the start of each step
+                if await self._check_run_cancellation():
+                    stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
+                    logger.info(f"Agent execution cancelled for run {self.current_run_id}")
+                    yield f"data: {stop_reason.model_dump_json()}\n\n"
+                    break
+                step_start = get_utc_timestamp_ns()
+                agent_step_span = tracer.start_span("agent_step", start_time=step_start)
+                agent_step_span.set_attributes({"step_id": step_id})
+                step_progression = StepProgression.START
+                should_continue = False
+                step_metrics = StepMetrics(id=step_id)  # Initialize metrics tracking
+                # Create step early with PENDING status
+                logged_step = await self.step_manager.log_step_async(
+                    actor=self.actor,
+                    agent_id=agent_state.id,
+                    provider_name=agent_state.llm_config.model_endpoint_type,
+                    provider_category=agent_state.llm_config.provider_category or "base",
+                    model=agent_state.llm_config.model,
+                    model_endpoint=agent_state.llm_config.model_endpoint,
+                    context_window_limit=agent_state.llm_config.context_window,
+                    usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
+                    provider_id=None,
+                    job_id=self.current_run_id if self.current_run_id else None,
+                    step_id=step_id,
+                    project_id=agent_state.project_id,
+                    status=StepStatus.PENDING,
+                )
+                # Only use step_id in messages if step was actually created
+                effective_step_id = step_id if logged_step else None
-                # TODO (cliandy): consolidate and expand with trace
-                MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
-                step_progression = StepProgression.FINISHED
+                try:
+                    (
+                        request_data,
+                        stream,
+                        current_in_context_messages,
+                        new_in_context_messages,
+                        valid_tool_names,
+                        provider_request_start_timestamp_ns,
+                    ) = await self._build_and_request_from_llm_streaming(
+                        first_chunk,
+                        agent_step_span,
+                        request_start_timestamp_ns,
+                        current_in_context_messages,
+                        new_in_context_messages,
+                        agent_state,
+                        llm_client,
+                        tool_rules_solver,
+                    )
-                # Record step metrics for successful completion
-                if logged_step and step_metrics:
-                    try:
-                        # Set the step_ns that was already calculated
-                        step_metrics.step_ns = step_ns
+                    step_progression = StepProgression.STREAM_RECEIVED
+                    log_event("agent.stream.llm_response.received")  # [3^]
+                    # TODO: THIS IS INCREDIBLY UGLY
+                    # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
+                    if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
+                        interface = AnthropicStreamingInterface(
+                            use_assistant_message=use_assistant_message,
+                            put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
+                        )
+                    elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
+                        interface = OpenAIStreamingInterface(
+                            use_assistant_message=use_assistant_message,
+                            is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
+                            messages=current_in_context_messages + new_in_context_messages,
+                            tools=request_data.get("tools", []),
+                            put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
+                        )
+                    else:
+                        raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
-                        # Get context attributes for project and template IDs
-                        ctx_attrs = get_ctx_attributes()
+                    async for chunk in interface.process(
+                        stream,
+                        ttft_span=request_span,
+                    ):
+                        # Measure TTFT (trace, metric, and db). This should be consolidated.
+                        if first_chunk and request_span is not None:
+                            now = get_utc_timestamp_ns()
+                            ttft_ns = now - request_start_timestamp_ns
-                        await self._record_step_metrics(
-                            step_id=step_id,
-                            agent_state=agent_state,
-                            step_metrics=step_metrics,
-                            ctx_attrs=ctx_attrs,
-                            job_id=self.current_run_id,
+                            request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
+                            metric_attributes = get_ctx_attributes()
+                            metric_attributes["model.name"] = agent_state.llm_config.model
+                            MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
+                            if self.current_run_id and self.job_manager:
+                                await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
+                            first_chunk = False
+                        if include_return_message_types is None or chunk.message_type in include_return_message_types:
+                            # filter down returned data
+                            yield f"data: {chunk.model_dump_json()}\n\n"
+                    stream_end_time_ns = get_utc_timestamp_ns()
+                    # Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
+                    if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
+                        logger.warning(
+                            f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
                         )
-                    except Exception as metrics_error:
-                        self.logger.warning(f"Failed to record step metrics: {metrics_error}")
+                        interface.input_tokens = interface.fallback_input_tokens
+                        interface.output_tokens = interface.fallback_output_tokens
+                    usage.step_count += 1
+                    usage.completion_tokens += interface.output_tokens
+                    usage.prompt_tokens += interface.input_tokens
+                    usage.total_tokens += interface.input_tokens + interface.output_tokens
+                    MetricRegistry().message_output_tokens.record(
+                        usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
+                    )
-            except Exception as e:
-                # Handle any unexpected errors during step processing
-                self.logger.error(f"Error during step processing: {e}")
-                job_update_metadata = {"error": str(e)}
-                # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
-                if not stop_reason:
-                    stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
-                elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
-                    self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
-                elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
-                    raise ValueError(f"Invalid Stop Reason: {stop_reason}")
-                # Send error stop reason to client and re-raise with expected response code
-                yield f"data: {stop_reason.model_dump_json()}\n\n", 500
-                raise
-            # Update step if it needs to be updated
-            finally:
-                if step_progression == StepProgression.FINISHED and should_continue:
-                    continue
-                self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
-                self.logger.info("Running final update. Step Progression: %s", step_progression)
-                try:
-                    if step_progression == StepProgression.FINISHED and not should_continue:
-                        # Successfully completed - update with final usage and stop reason
-                        if stop_reason is None:
-                            stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
-                        # Note: step already updated with success status after _handle_ai_response
-                        if logged_step:
-                            await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
-                        break
-                    # Handle error cases
-                    if step_progression < StepProgression.STEP_LOGGED:
-                        # Error occurred before step was fully logged
-                        import traceback
-                        if logged_step:
-                            await self.step_manager.update_step_error_async(
-                                actor=self.actor,
+                    # log LLM request time
+                    llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns
+                    step_metrics.llm_request_ns = llm_request_ns
+                    llm_request_ms = ns_to_ms(llm_request_ns)
+                    agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
+                    MetricRegistry().llm_execution_time_ms_histogram.record(
+                        llm_request_ms,
+                        dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
+                    )
+                    # Process resulting stream content
+                    try:
+                        tool_call = interface.get_tool_call_object()
+                    except ValueError as e:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
+                        raise e
+                    except Exception as e:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
+                        raise e
+                    reasoning_content = interface.get_reasoning_content()
+                    persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
+                        tool_call,
+                        valid_tool_names,
+                        agent_state,
+                        tool_rules_solver,
+                        UsageStatistics(
+                            completion_tokens=usage.completion_tokens,
+                            prompt_tokens=usage.prompt_tokens,
+                            total_tokens=usage.total_tokens,
+                        ),
+                        reasoning_content=reasoning_content,
+                        pre_computed_assistant_message_id=interface.letta_message_id,
+                        step_id=effective_step_id,
+                        initial_messages=initial_messages,
+                        agent_step_span=agent_step_span,
+                        is_final_step=(i == max_steps - 1),
+                        step_metrics=step_metrics,
+                    )
+                    step_progression = StepProgression.STEP_LOGGED
+                    # Update step with actual usage now that we have it (if step was created)
+                    if logged_step:
+                        await self.step_manager.update_step_success_async(
+                            self.actor,
+                            step_id,
+                            UsageStatistics(
+                                completion_tokens=usage.completion_tokens,
+                                prompt_tokens=usage.prompt_tokens,
+                                total_tokens=usage.total_tokens,
+                            ),
+                            stop_reason,
+                        )
+                    new_message_idx = len(initial_messages) if initial_messages else 0
+                    self.response_messages.extend(persisted_messages[new_message_idx:])
+                    new_in_context_messages.extend(persisted_messages[new_message_idx:])
+                    initial_messages = None
+                    # log total step time
+                    now = get_utc_timestamp_ns()
+                    step_ns = now - step_start
+                    agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
+                    agent_step_span.end()
+                    # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
+                    # log_event("agent.stream.llm_response.processed") # [4^]
+                    # Log LLM Trace
+                    # We are piecing together the streamed response here.
+                    # Content here does not match the actual response schema as streams come in chunks.
+                    if settings.track_provider_trace:
+                        await self.telemetry_manager.create_provider_trace_async(
+                            actor=self.actor,
+                            provider_trace_create=ProviderTraceCreate(
+                                request_json=request_data,
+                                response_json={
+                                    "content": {
+                                        "tool_call": tool_call.model_dump_json(),
+                                        "reasoning": [content.model_dump_json() for content in reasoning_content],
+                                    },
+                                    "id": interface.message_id,
+                                    "model": interface.model,
+                                    "role": "assistant",
+                                    # "stop_reason": "",
+                                    # "stop_sequence": None,
+                                    "type": "message",
+                                    "usage": {
+                                        "input_tokens": usage.prompt_tokens,
+                                        "output_tokens": usage.completion_tokens,
+                                    },
+                                },
                                 step_id=step_id,  # Use original step_id for telemetry
-                                error_type=type(e).__name__ if "e" in locals() else "Unknown",
-                                error_message=str(e) if "e" in locals() else "Unknown error",
-                                error_traceback=traceback.format_exc(),
-                                stop_reason=stop_reason,
-                            )
+                                organization_id=self.actor.organization_id,
+                            ),
+                        )
+                        step_progression = StepProgression.LOGGED_TRACE
-                    if step_progression <= StepProgression.STREAM_RECEIVED:
-                        if first_chunk and settings.track_errored_messages and initial_messages:
-                            for message in initial_messages:
-                                message.is_err = True
-                                message.step_id = effective_step_id
-                            await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
-                    elif step_progression <= StepProgression.LOGGED_TRACE:
-                        if stop_reason is None:
-                            self.logger.error("Error in step after logging step")
-                            stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
-                        if logged_step:
-                            await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
-                    else:
-                        self.logger.error("Invalid StepProgression value")
+                    # yields tool response as this is handled from Letta and not the response from the LLM provider
+                    tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
+                    if not (use_assistant_message and tool_return.name == "send_message"):
+                        # Apply message type filtering if specified
+                        if include_return_message_types is None or tool_return.message_type in include_return_message_types:
+                            yield f"data: {tool_return.model_dump_json()}\n\n"
-                    # Do tracking for failure cases. Can consolidate with success conditions later.
-                    if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
+                    # TODO (cliandy): consolidate and expand with trace
+                    MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
+                    step_progression = StepProgression.FINISHED
-                    # Record partial step metrics on failure (capture whatever timing data we have)
-                    if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
+                    # Record step metrics for successful completion
+                    if logged_step and step_metrics:
                         try:
-                            # Calculate total step time up to the failure point
-                            step_metrics.step_ns = get_utc_timestamp_ns() - step_start
+                            # Set the step_ns that was already calculated
+                            step_metrics.step_ns = step_ns
                             # Get context attributes for project and template IDs
                             ctx_attrs = get_ctx_attributes()
@@ -1148,16 +1195,109 @@ class LettaAgent(BaseAgent):
                                 agent_state=agent_state,
                                 step_metrics=step_metrics,
                                 ctx_attrs=ctx_attrs,
-                                job_id=locals().get("run_id", self.current_run_id),
+                                job_id=self.current_run_id,
                             )
                         except Exception as metrics_error:
                             self.logger.warning(f"Failed to record step metrics: {metrics_error}")
                 except Exception as e:
-                    self.logger.error("Failed to update step: %s", e)
+                    # Handle any unexpected errors during step processing
+                    self.logger.error(f"Error during step processing: {e}")
+                    job_update_metadata = {"error": str(e)}
+                    # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
+                    if not stop_reason:
+                        stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+                    elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
+                        self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
+                    elif stop_reason.stop_reason not in (
+                        StopReasonType.no_tool_call,
+                        StopReasonType.invalid_tool_call,
+                        StopReasonType.invalid_llm_response,
+                    ):
+                        self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
+                    # Send error stop reason to client and re-raise with expected response code
+                    yield f"data: {stop_reason.model_dump_json()}\n\n", 500
+                    raise
-            if not should_continue:
-                break
+                # Update step if it needs to be updated
+                finally:
+                    if step_progression == StepProgression.FINISHED and should_continue:
+                        continue
+                    self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
+                    self.logger.info("Running final update. Step Progression: %s", step_progression)
+                    try:
+                        if step_progression == StepProgression.FINISHED and not should_continue:
+                            # Successfully completed - update with final usage and stop reason
+                            if stop_reason is None:
+                                stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
+                            # Note: step already updated with success status after _handle_ai_response
+                            if logged_step:
+                                await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
+                            break
+                        # Handle error cases
+                        if step_progression < StepProgression.STEP_LOGGED:
+                            # Error occurred before step was fully logged
+                            import traceback
+                            if logged_step:
+                                await self.step_manager.update_step_error_async(
+                                    actor=self.actor,
+                                    step_id=step_id,  # Use original step_id for telemetry
+                                    error_type=type(e).__name__ if "e" in locals() else "Unknown",
+                                    error_message=str(e) if "e" in locals() else "Unknown error",
+                                    error_traceback=traceback.format_exc(),
+                                    stop_reason=stop_reason,
+                                )
+                        if step_progression <= StepProgression.STREAM_RECEIVED:
+                            if first_chunk and settings.track_errored_messages and initial_messages:
+                                for message in initial_messages:
+                                    message.is_err = True
+                                    message.step_id = effective_step_id
+                                await self.message_manager.create_many_messages_async(
+                                    initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
+                                )
+                        elif step_progression <= StepProgression.LOGGED_TRACE:
+                            if stop_reason is None:
+                                self.logger.error("Error in step after logging step")
+                                stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+                            if logged_step:
+                                await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
+                        else:
+                            self.logger.error("Invalid StepProgression value")
+                        # Do tracking for failure cases. Can consolidate with success conditions later.
+                        if settings.track_stop_reason:
+                            await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
+                        # Record partial step metrics on failure (capture whatever timing data we have)
+                        if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
+                            try:
+                                # Calculate total step time up to the failure point
+                                step_metrics.step_ns = get_utc_timestamp_ns() - step_start
+                                # Get context attributes for project and template IDs
+                                ctx_attrs = get_ctx_attributes()
+                                await self._record_step_metrics(
+                                    step_id=step_id,
+                                    agent_state=agent_state,
+                                    step_metrics=step_metrics,
+                                    ctx_attrs=ctx_attrs,
+                                    job_id=locals().get("run_id", self.current_run_id),
+                                )
+                            except Exception as metrics_error:
+                                self.logger.warning(f"Failed to record step metrics: {metrics_error}")
+                    except Exception as e:
+                        self.logger.error("Failed to update step: %s", e)
+                if not should_continue:
+                    break
         # Extend the in context message ids
         if not agent_state.message_buffer_autoclear:
             await self._rebuild_context_window(
@@ -1494,14 +1634,46 @@ class LettaAgent(BaseAgent):
         is_final_step: bool | None = None,
         run_id: str | None = None,
         step_metrics: StepMetrics = None,
+        is_approval: bool | None = None,
+        is_denial: bool | None = None,
+        denial_reason: str | None = None,
     ) -> tuple[list[Message], bool, LettaStopReason | None]:
         """
         Handle the final AI response once streaming completes, execute / validate the
         tool call, decide whether we should keep stepping, and persist state.
         """
+        tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
+        if is_denial:
+            continue_stepping = True
+            stop_reason = None
+            tool_call_messages = create_letta_messages_from_llm_response(
+                agent_id=agent_state.id,
+                model=agent_state.llm_config.model,
+                function_name="",
+                function_arguments={},
+                tool_execution_result=ToolExecutionResult(status="error"),
+                tool_call_id=tool_call_id,
+                function_call_success=False,
+                function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
+                timezone=agent_state.timezone,
+                actor=self.actor,
+                continue_stepping=continue_stepping,
+                heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
+                reasoning_content=None,
+                pre_computed_assistant_message_id=None,
+                step_id=step_id,
+                is_approval_response=True,
+            )
+            messages_to_persist = (initial_messages or []) + tool_call_messages
+            persisted_messages = await self.message_manager.create_many_messages_async(
+                messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
+            )
+            return persisted_messages, continue_stepping, stop_reason
         # 1.  Parse and validate the tool-call envelope
         tool_call_name: str = tool_call.function.name
-        tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
         tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
         request_heartbeat: bool = _pop_heartbeat(tool_args)
         tool_args.pop(INNER_THOUGHTS_KWARG, None)
@@ -1515,77 +1687,99 @@ class LettaAgent(BaseAgent):
             request_heartbeat=request_heartbeat,
         )
-        # 2.  Execute the tool (or synthesize an error result if disallowed)
-        tool_rule_violated = tool_call_name not in valid_tool_names
-        if tool_rule_violated:
-            tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
-        else:
-            # Track tool execution time
-            tool_start_time = get_utc_timestamp_ns()
-            tool_execution_result = await self._execute_tool(
-                tool_name=tool_call_name,
-                tool_args=tool_args,
-                agent_state=agent_state,
-                agent_step_span=agent_step_span,
+        if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
+            approval_message = create_approval_request_message_from_llm_response(
+                agent_id=agent_state.id,
+                model=agent_state.llm_config.model,
+                function_name=tool_call_name,
+                function_arguments=tool_args,
+                tool_call_id=tool_call_id,
+                actor=self.actor,
+                continue_stepping=request_heartbeat,
+                reasoning_content=reasoning_content,
+                pre_computed_assistant_message_id=pre_computed_assistant_message_id,
                 step_id=step_id,
             )
-            tool_end_time = get_utc_timestamp_ns()
+            messages_to_persist = (initial_messages or []) + [approval_message]
+            continue_stepping = False
+            stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
+        else:
+            # 2.  Execute the tool (or synthesize an error result if disallowed)
+            tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
+            if tool_rule_violated:
+                tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
+            else:
+                # Track tool execution time
+                tool_start_time = get_utc_timestamp_ns()
+                tool_execution_result = await self._execute_tool(
+                    tool_name=tool_call_name,
+                    tool_args=tool_args,
+                    agent_state=agent_state,
+                    agent_step_span=agent_step_span,
+                    step_id=step_id,
+                )
+                tool_end_time = get_utc_timestamp_ns()
-            # Store tool execution time in metrics
-            step_metrics.tool_execution_ns = tool_end_time - tool_start_time
+                # Store tool execution time in metrics
+                step_metrics.tool_execution_ns = tool_end_time - tool_start_time
-        log_telemetry(
-            self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
-        )
+            log_telemetry(
+                self.logger,
+                "_handle_ai_response execute tool finish",
+                tool_execution_result=tool_execution_result,
+                tool_call_id=tool_call_id,
+            )
-        # 3.  Prepare the function-response payload
-        truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
-        return_char_limit = next(
-            (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
-            None,
-        )
-        function_response_string = validate_function_response(
-            tool_execution_result.func_return,
-            return_char_limit=return_char_limit,
-            truncate=truncate,
-        )
-        self.last_function_response = package_function_response(
-            was_success=tool_execution_result.success_flag,
-            response_string=function_response_string,
-            timezone=agent_state.timezone,
-        )
+            # 3.  Prepare the function-response payload
+            truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
+            return_char_limit = next(
+                (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
+                None,
+            )
+            function_response_string = validate_function_response(
+                tool_execution_result.func_return,
+                return_char_limit=return_char_limit,
+                truncate=truncate,
+            )
+            self.last_function_response = package_function_response(
+                was_success=tool_execution_result.success_flag,
+                response_string=function_response_string,
+                timezone=agent_state.timezone,
+            )
-        # 4.  Decide whether to keep stepping  (<<< focal section simplified)
-        continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
-            agent_state=agent_state,
-            request_heartbeat=request_heartbeat,
-            tool_call_name=tool_call_name,
-            tool_rule_violated=tool_rule_violated,
-            tool_rules_solver=tool_rules_solver,
-            is_final_step=is_final_step,
-        )
+            # 4.  Decide whether to keep stepping  (focal section simplified)
+            continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
+                agent_state=agent_state,
+                request_heartbeat=request_heartbeat,
+                tool_call_name=tool_call_name,
+                tool_rule_violated=tool_rule_violated,
+                tool_rules_solver=tool_rules_solver,
+                is_final_step=is_final_step,
+            )
-        # 5.  Create messages (step was already created at the beginning)
-        tool_call_messages = create_letta_messages_from_llm_response(
-            agent_id=agent_state.id,
-            model=agent_state.llm_config.model,
-            function_name=tool_call_name,
-            function_arguments=tool_args,
-            tool_execution_result=tool_execution_result,
-            tool_call_id=tool_call_id,
-            function_call_success=tool_execution_result.success_flag,
-            function_response=function_response_string,
-            timezone=agent_state.timezone,
-            actor=self.actor,
-            continue_stepping=continue_stepping,
-            heartbeat_reason=heartbeat_reason,
-            reasoning_content=reasoning_content,
-            pre_computed_assistant_message_id=pre_computed_assistant_message_id,
-            step_id=step_id,
-        )
+            # 5.  Create messages (step was already created at the beginning)
+            tool_call_messages = create_letta_messages_from_llm_response(
+                agent_id=agent_state.id,
+                model=agent_state.llm_config.model,
+                function_name=tool_call_name,
+                function_arguments=tool_args,
+                tool_execution_result=tool_execution_result,
+                tool_call_id=tool_call_id,
+                function_call_success=tool_execution_result.success_flag,
+                function_response=function_response_string,
+                timezone=agent_state.timezone,
+                actor=self.actor,
+                continue_stepping=continue_stepping,
+                heartbeat_reason=heartbeat_reason,
+                reasoning_content=reasoning_content,
+                pre_computed_assistant_message_id=pre_computed_assistant_message_id,
+                step_id=step_id,
+                is_approval_response=is_approval or is_denial,
+            )
+            messages_to_persist = (initial_messages or []) + tool_call_messages
         persisted_messages = await self.message_manager.create_many_messages_async(
-            (initial_messages or []) + tool_call_messages, actor=self.actor
+            messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
         )
         if run_id:
@@ -1606,7 +1800,6 @@ class LettaAgent(BaseAgent):
         tool_rules_solver: ToolRulesSolver,
         is_final_step: bool | None,
     ) -> tuple[bool, str | None, LettaStopReason | None]:
         continue_stepping = request_heartbeat
         heartbeat_reason: str | None = None
         stop_reason: LettaStopReason | None = None
@@ -1638,9 +1831,7 @@ class LettaAgent(BaseAgent):
             uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
             if not continue_stepping and uncalled:
                 continue_stepping = True
-                heartbeat_reason = (
-                    f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [" f"{', '.join(uncalled)}] to be called still."
-                )
+                heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [{', '.join(uncalled)}] to be called still."
                 stop_reason = None  # reset – we’re still going

letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

letta-nightly 0.11.6.dev20250902104140py3-none-any.whl → 0.11.7.dev20250904045700py3-none-any.whl