PyPI - agnt5 - Versions diffs - 0.2.8a6__cp310-abi3-macosx_11_0_arm64.whl → 0.2.8a8__cp310-abi3-macosx_11_0_arm64.whl - Mend

agnt5 0.2.8a6__cp310-abi3-macosx_11_0_arm64.whl → 0.2.8a8__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of agnt5 might be problematic. Click here for more details.

Files changed (15) hide show

agnt5/_core.abi3.so +0 -0
agnt5/_telemetry.py +7 -2
agnt5/agent.py +744 -171
agnt5/client.py +18 -1
agnt5/context.py +94 -0
agnt5/exceptions.py +13 -0
agnt5/function.py +18 -11
agnt5/lm.py +124 -16
agnt5/tool.py +110 -29
agnt5/worker.py +421 -28
agnt5/workflow.py +367 -72
{agnt5-0.2.8a6.dist-info → agnt5-0.2.8a8.dist-info}/METADATA +1 -1
agnt5-0.2.8a8.dist-info/RECORD +22 -0
agnt5-0.2.8a6.dist-info/RECORD +0 -22
{agnt5-0.2.8a6.dist-info → agnt5-0.2.8a8.dist-info}/WHEEL +0 -0

agnt5/worker.py CHANGED Viewed

@@ -14,6 +14,38 @@ from ._telemetry import setup_module_logger
 logger = setup_module_logger(__name__)
+def _normalize_metadata(metadata: Dict[str, Any]) -> Dict[str, str]:
+    """
+    Convert metadata dictionary to Dict[str, str] for Rust FFI compatibility.
+    PyO3 requires HashMap<String, String>, but Python code may include booleans,
+    integers, or other types. This helper ensures all values are strings.
+    Args:
+        metadata: Dictionary with potentially mixed types
+    Returns:
+        Dictionary with all string values
+    Example:
+        >>> _normalize_metadata({"error": True, "count": 42, "msg": "hello"})
+        {"error": "true", "count": "42", "msg": "hello"}
+    """
+    normalized = {}
+    for key, value in metadata.items():
+        if isinstance(value, str):
+            normalized[key] = value
+        elif isinstance(value, bool):
+            # Convert bool to lowercase string for JSON compatibility
+            normalized[key] = str(value).lower()
+        elif value is None:
+            normalized[key] = ""
+        else:
+            # Convert any other type to string representation
+            normalized[key] = str(value)
+    return normalized
 # Context variable to store trace metadata for propagation to LM calls
 # This allows Rust LM layer to access traceparent without explicit parameter passing
 _trace_metadata: contextvars.ContextVar[Dict[str, str]] = contextvars.ContextVar(
@@ -455,11 +487,22 @@ class Worker:
             output_schema_str = json.dumps(config.output_schema) if config.output_schema else None
             metadata = config.metadata if config.metadata else {}
+            # Serialize retry and backoff policies
+            config_dict = {}
+            if config.retries:
+                config_dict["max_attempts"] = str(config.retries.max_attempts)
+                config_dict["initial_interval_ms"] = str(config.retries.initial_interval_ms)
+                config_dict["max_interval_ms"] = str(config.retries.max_interval_ms)
+            if config.backoff:
+                config_dict["backoff_type"] = config.backoff.type.value
+                config_dict["backoff_multiplier"] = str(config.backoff.multiplier)
             component_info = self._PyComponentInfo(
                 name=config.name,
                 component_type="function",
                 metadata=metadata,
-                config={},
+                config=config_dict,
                 input_schema=input_schema_str,
                 output_schema=output_schema_str,
                 definition=None,
@@ -627,6 +670,30 @@ class Worker:
         return handle_message
+    def _extract_critical_metadata(self, request) -> Dict[str, str]:
+        """
+        Extract critical metadata from request that MUST be propagated to response.
+        This ensures journal events are written to the correct tenant partition
+        and can be properly replayed. Missing tenant_id causes catastrophic
+        event sourcing corruption where events are split across partitions.
+        Returns:
+            Dict[str, str]: Metadata with all values normalized to strings for Rust FFI
+        """
+        metadata = {}
+        if hasattr(request, 'metadata') and request.metadata:
+            # CRITICAL: Propagate tenant_id to prevent journal corruption
+            # Convert to string immediately to ensure Rust FFI compatibility
+            if "tenant_id" in request.metadata:
+                metadata["tenant_id"] = str(request.metadata["tenant_id"])
+            if "deployment_id" in request.metadata:
+                metadata["deployment_id"] = str(request.metadata["deployment_id"])
+        # CRITICAL: Normalize all metadata values to strings for Rust FFI (PyO3)
+        # PyO3 expects HashMap<String, String> and will fail with bool/int values
+        return _normalize_metadata(metadata)
     async def _execute_function(self, config, input_data: bytes, request):
         """Execute a function handler (supports both regular and streaming functions)."""
         import json
@@ -647,17 +714,33 @@ class Worker:
                 _trace_metadata.set(dict(request.metadata))
                 logger.debug(f"Trace metadata stored: traceparent={request.metadata.get('traceparent', 'N/A')}")
-            # Create context with runtime_context for trace correlation
-            ctx = Context(
+            # Extract attempt number from platform request (if provided)
+            platform_attempt = getattr(request, 'attempt', 0)
+            # Create FunctionContext with attempt number for retry tracking
+            # - If platform_attempt > 0: Platform is orchestrating retries
+            # - If platform_attempt == 0: First attempt (or no retry config)
+            from .function import FunctionContext
+            ctx = FunctionContext(
                 run_id=f"{self.service_name}:{config.name}",
+                attempt=platform_attempt,
                 runtime_context=request.runtime_context,
+                retry_policy=config.retries,
             )
+            # Set context in contextvar so get_current_context() and error handlers can access it
+            from .context import set_current_context, _current_context
+            token = set_current_context(ctx)
             # Execute function directly - Rust bridge handles tracing
             # Note: Removed Python-level span creation to avoid duplicate spans.
             # The Rust worker bridge (sdk-python/rust-src/worker.rs:413-659) already
             # creates a comprehensive OpenTelemetry span with all necessary attributes.
             # See DUPLICATE_SPANS_FIX.md for details.
+            #
+            # Note on retry handling:
+            # - If platform_attempt > 0: Platform is orchestrating retries, execute once
+            # - If platform_attempt == 0: Local retry loop in decorator wrapper handles retries
             if input_dict:
                 result = config.handler(ctx, **input_dict)
             else:
@@ -688,6 +771,7 @@ class Worker:
                         is_chunk=True,
                         done=False,
                         chunk_index=chunk_index,
+                        attempt=platform_attempt,
                     ))
                     chunk_index += 1
@@ -702,6 +786,7 @@ class Worker:
                     is_chunk=True,
                     done=True,
                     chunk_index=chunk_index,
+                    attempt=platform_attempt,
                 ))
                 logger.debug(f"Streaming function produced {len(responses)} chunks")
@@ -714,34 +799,69 @@ class Worker:
                 # Serialize result
                 output_data = json.dumps(result).encode("utf-8")
+                # Extract critical metadata for journal event correlation
+                response_metadata = self._extract_critical_metadata(request)
                 return PyExecuteComponentResponse(
                     invocation_id=request.invocation_id,
                     success=True,
                     output_data=output_data,
                     state_update=None,
                     error_message=None,
-                    metadata=None,
+                    metadata=response_metadata if response_metadata else None,
                     is_chunk=False,
                     done=True,
                     chunk_index=0,
+                    attempt=platform_attempt,
                 )
         except Exception as e:
             # Include exception type for better error messages
             error_msg = f"{type(e).__name__}: {str(e)}"
-            logger.error(f"Function execution failed: {error_msg}", exc_info=True)
+            # Capture full stack trace for telemetry
+            import traceback
+            stack_trace = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
+            # Log with full traceback using ctx.logger to ensure run_id correlation
+            from .context import get_current_context
+            current_ctx = get_current_context()
+            error_logger = current_ctx.logger if current_ctx else logger
+            error_logger.error(f"Function execution failed: {error_msg}", exc_info=True)
+            # Store stack trace in metadata for observability
+            metadata = {
+                "error_type": type(e).__name__,
+                "stack_trace": stack_trace,
+                "error": True,  # Boolean flag for error detection
+            }
+            # CRITICAL: Extract critical metadata (including tenant_id) for journal event correlation
+            # This ensures run.failed events are properly emitted by Worker Coordinator
+            critical_metadata = self._extract_critical_metadata(request)
+            metadata.update(critical_metadata)
+            # CRITICAL: Normalize metadata to ensure all values are strings (Rust FFI requirement)
+            # PyO3 expects HashMap<String, String>, but we may have booleans or other types
+            normalized_metadata = _normalize_metadata(metadata)
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
                 success=False,
                 output_data=b"",
                 state_update=None,
                 error_message=error_msg,
-                metadata=None,
+                metadata=normalized_metadata,
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
+        finally:
+            # Always reset context to prevent leakage between executions
+            _current_context.reset(token)
     async def _execute_workflow(self, config, input_data: bytes, request):
         """Execute a workflow handler with automatic replay support."""
         import json
@@ -798,8 +918,35 @@ class Worker:
                     user_response = request.metadata["user_response"]
                     logger.info(f"▶️  Resuming workflow with user response: {user_response}")
-            # Create WorkflowEntity for state management
-            workflow_entity = WorkflowEntity(run_id=f"{self.service_name}:{config.name}")
+            # NEW: Check for agent resume (agent-level HITL)
+            agent_context = None
+            if hasattr(request, 'metadata') and request.metadata:
+                if "agent_context" in request.metadata:
+                    agent_context_json = request.metadata["agent_context"]
+                    try:
+                        agent_context = json.loads(agent_context_json)
+                        agent_name = agent_context.get("agent_name", "unknown")
+                        iteration = agent_context.get("iteration", 0)
+                        logger.info(
+                            f"▶️  Resuming agent '{agent_name}' from iteration {iteration} "
+                            f"with user response: {user_response}"
+                        )
+                    except json.JSONDecodeError:
+                        logger.warning("Failed to parse agent_context from metadata")
+                        agent_context = None
+            # Extract session_id and user_id from request for memory scoping
+            # Do this FIRST so we can pass to WorkflowEntity constructor
+            session_id = request.session_id if hasattr(request, 'session_id') and request.session_id else request.invocation_id
+            user_id = request.user_id if hasattr(request, 'user_id') and request.user_id else None
+            # Create WorkflowEntity for state management with memory scoping
+            # Entity key will be scoped based on priority: user_id > session_id > run_id
+            workflow_entity = WorkflowEntity(
+                run_id=request.invocation_id,
+                session_id=session_id,
+                user_id=user_id,
+            )
             # Load replay data into entity if provided
             if completed_steps:
@@ -822,21 +969,75 @@ class Worker:
                     # Production mode - state is managed by Rust core
                     logger.debug(f"Initial state will be loaded from platform (production mode)")
-            # Create WorkflowContext with entity and runtime_context for trace correlation
+            # Create checkpoint callback for real-time streaming
+            def checkpoint_callback(checkpoint: dict) -> None:
+                """Send checkpoint to Rust worker queue."""
+                try:
+                    # Extract critical metadata for checkpoint routing
+                    metadata = self._extract_critical_metadata(request)
+                    # DEBUG: Log metadata types for troubleshooting PyO3 conversion errors
+                    logger.debug(f"Checkpoint metadata types: {[(k, type(v).__name__) for k, v in metadata.items()]}")
+                    # Queue checkpoint via Rust FFI
+                    self._rust_worker.queue_workflow_checkpoint(
+                        invocation_id=request.invocation_id,
+                        checkpoint_type=checkpoint["checkpoint_type"],
+                        checkpoint_data=json.dumps(checkpoint["checkpoint_data"]),
+                        sequence_number=checkpoint["sequence_number"],
+                        metadata=metadata,
+                    )
+                    logger.debug(
+                        f"Queued checkpoint: type={checkpoint['checkpoint_type']} "
+                        f"seq={checkpoint['sequence_number']}"
+                    )
+                except Exception as e:
+                    logger.error(f"Failed to queue checkpoint: {e}", exc_info=True)
+                    logger.error(f"Checkpoint metadata causing error: {metadata}")
+                    logger.error(f"Checkpoint data: {checkpoint}")
+            # Create WorkflowContext with entity, runtime_context, and checkpoint callback
             ctx = WorkflowContext(
                 workflow_entity=workflow_entity,
-                run_id=f"{self.service_name}:{config.name}",
+                run_id=request.invocation_id,  # Use unique invocation_id for this execution
+                session_id=session_id,  # Session for multi-turn conversations
+                user_id=user_id,  # User for long-term memory
                 runtime_context=request.runtime_context,
+                checkpoint_callback=checkpoint_callback,
             )
+            # NEW: Populate agent resume info if this is an agent HITL resume
+            if agent_context and user_response:
+                ctx._agent_resume_info = {
+                    "agent_name": agent_context["agent_name"],
+                    "agent_context": agent_context,
+                    "user_response": user_response,
+                }
+                logger.debug(
+                    f"Set agent resume info for '{agent_context['agent_name']}' "
+                    f"in workflow context"
+                )
             # Execute workflow directly - Rust bridge handles tracing
             # Note: Removed Python-level span creation to avoid duplicate spans.
             # The Rust worker bridge creates comprehensive OpenTelemetry spans.
             # See DUPLICATE_SPANS_FIX.md for details.
-            if input_dict:
-                result = await config.handler(ctx, **input_dict)
-            else:
-                result = await config.handler(ctx)
+            # CRITICAL: Set context in contextvar so LM/Agent/Tool calls can access it
+            from .context import set_current_context
+            token = set_current_context(ctx)
+            try:
+                if input_dict:
+                    result = await config.handler(ctx, **input_dict)
+                else:
+                    result = await config.handler(ctx)
+                # Note: Workflow entity persistence is handled by the @workflow decorator wrapper
+                # which persists before returning. No need to persist here.
+            finally:
+                # Always reset context to prevent leakage
+                from .context import _current_context
+                _current_context.reset(token)
             # Note: Removed flush_telemetry_py() call here - it was causing 2-second blocking delay!
             # The batch span processor handles flushing automatically with 5s timeout
@@ -847,6 +1048,11 @@ class Worker:
             # Collect workflow execution metadata for durability
             metadata = {}
+            # CRITICAL: Propagate tenant_id and deployment_id to prevent journal corruption
+            # Missing tenant_id causes events to be written to wrong partition
+            critical_metadata = self._extract_critical_metadata(request)
+            metadata.update(critical_metadata)
             # Add step events to metadata (for workflow durability)
             # Access _step_events from the workflow entity, not the context
             step_events = ctx._workflow_entity._step_events
@@ -862,11 +1068,41 @@ class Worker:
                     metadata["workflow_state"] = json.dumps(state_snapshot)
                     logger.debug(f"Workflow state snapshot: {state_snapshot}")
+                    # AUDIT TRAIL: Serialize complete state change history for replay and debugging
+                    # This captures all intermediate state mutations, not just final snapshot
+                    state_changes = ctx._workflow_entity._state_changes
+                    logger.info(f"🔍 DEBUG: _state_changes list has {len(state_changes)} entries")
+                    if state_changes:
+                        metadata["state_changes"] = json.dumps(state_changes)
+                        logger.info(f"✅ Serialized {len(state_changes)} state changes to metadata")
+                    else:
+                        logger.warning("⚠️  _state_changes list is empty - no state change history captured")
+                    # CRITICAL: Persist workflow entity state to platform
+                    # This stores the WorkflowEntity as a first-class entity with proper versioning
+                    try:
+                        logger.info(f"🔍 DEBUG: About to call _persist_state() for run {request.invocation_id}")
+                        await ctx._workflow_entity._persist_state()
+                        logger.info(f"✅ Successfully persisted WorkflowEntity state for run {request.invocation_id}")
+                    except Exception as persist_error:
+                        logger.error(f"❌ Failed to persist WorkflowEntity state (non-fatal): {persist_error}", exc_info=True)
+                        # Continue anyway - persistence failure shouldn't fail the workflow
             logger.info(f"Workflow completed successfully with {len(step_events)} steps")
             # Add session_id to metadata for multi-turn conversation support
             metadata["session_id"] = session_id
+            # CRITICAL: Flush all buffered checkpoints before returning response
+            # This ensures checkpoints arrive at platform BEFORE run.completed event
+            try:
+                flushed_count = self._rust_worker.flush_workflow_checkpoints()
+                if flushed_count > 0:
+                    logger.info(f"✅ Flushed {flushed_count} checkpoints before completion")
+            except Exception as flush_error:
+                logger.error(f"Failed to flush checkpoints: {flush_error}", exc_info=True)
+                # Continue anyway - checkpoint flushing is best-effort
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
                 success=True,
@@ -877,11 +1113,13 @@ class Worker:
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
         except WaitingForUserInputException as e:
-            # Workflow paused for user input
-            logger.info(f"⏸️  Workflow paused waiting for user input: {e.question}")
+            # Workflow or agent paused for user input
+            pause_type = "agent" if e.agent_context else "workflow"
+            logger.info(f"⏸️  {pause_type.capitalize()} paused waiting for user input: {e.question}")
             # Collect metadata for pause state
             # Note: All metadata values must be strings for Rust FFI
@@ -889,8 +1127,13 @@ class Worker:
                 "status": "awaiting_user_input",
                 "question": e.question,
                 "input_type": e.input_type,
+                "pause_type": pause_type,  # NEW: Indicates workflow vs agent pause
             }
+            # CRITICAL: Propagate tenant_id even when pausing
+            critical_metadata = self._extract_critical_metadata(request)
+            pause_metadata.update(critical_metadata)
             # Add optional fields only if they exist
             if e.options:
                 pause_metadata["options"] = json.dumps(e.options)
@@ -899,6 +1142,14 @@ class Worker:
             if session_id:
                 pause_metadata["session_id"] = session_id
+            # NEW: Store agent execution state if present
+            if e.agent_context:
+                pause_metadata["agent_context"] = json.dumps(e.agent_context)
+                logger.debug(
+                    f"Agent '{e.agent_context['agent_name']}' paused at "
+                    f"iteration {e.agent_context['iteration']}"
+                )
             # Add step events to pause metadata for durability
             step_events = ctx._workflow_entity._step_events
             if step_events:
@@ -912,6 +1163,12 @@ class Worker:
                     pause_metadata["workflow_state"] = json.dumps(state_snapshot)
                     logger.debug(f"Paused workflow state snapshot: {state_snapshot}")
+                    # AUDIT TRAIL: Also include state change history for paused workflows
+                    state_changes = ctx._workflow_entity._state_changes
+                    if state_changes:
+                        pause_metadata["state_changes"] = json.dumps(state_changes)
+                        logger.debug(f"Paused workflow has {len(state_changes)} state changes in history")
             # Return "success" with awaiting_user_input metadata
             # The output contains the question details for the client
             output = {
@@ -931,22 +1188,45 @@ class Worker:
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
         except Exception as e:
             # Include exception type for better error messages
             error_msg = f"{type(e).__name__}: {str(e)}"
+            # Capture full stack trace for telemetry
+            import traceback
+            stack_trace = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
+            # Log with full traceback
             logger.error(f"Workflow execution failed: {error_msg}", exc_info=True)
+            # Store error metadata for observability
+            metadata = {
+                "error_type": type(e).__name__,
+                "stack_trace": stack_trace,
+                "error": True,
+            }
+            # Extract critical metadata for journal correlation (if available)
+            critical_metadata = self._extract_critical_metadata(request)
+            metadata.update(critical_metadata)
+            # Normalize metadata for Rust FFI compatibility
+            normalized_metadata = _normalize_metadata(metadata)
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
                 success=False,
                 output_data=b"",
                 state_update=None,
                 error_message=error_msg,
-                metadata=None,
+                metadata=normalized_metadata,
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
     async def _execute_tool(self, tool, input_data: bytes, request):
@@ -965,6 +1245,10 @@ class Worker:
                 runtime_context=request.runtime_context,
             )
+            # Set context in contextvar so get_current_context() and error handlers can access it
+            from .context import set_current_context, _current_context
+            token = set_current_context(ctx)
             # Execute tool
             result = await tool.invoke(ctx, **input_dict)
@@ -981,24 +1265,54 @@ class Worker:
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
         except Exception as e:
             # Include exception type for better error messages
             error_msg = f"{type(e).__name__}: {str(e)}"
-            logger.error(f"Tool execution failed: {error_msg}", exc_info=True)
+            # Capture full stack trace for telemetry
+            import traceback
+            stack_trace = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
+            # Log with full traceback using ctx.logger to ensure run_id correlation
+            from .context import get_current_context
+            current_ctx = get_current_context()
+            error_logger = current_ctx.logger if current_ctx else logger
+            error_logger.error(f"Tool execution failed: {error_msg}", exc_info=True)
+            # Store error metadata for observability
+            metadata = {
+                "error_type": type(e).__name__,
+                "stack_trace": stack_trace,
+                "error": True,
+            }
+            # CRITICAL: Extract critical metadata (including tenant_id) for journal event correlation
+            critical_metadata = self._extract_critical_metadata(request)
+            metadata.update(critical_metadata)
+            # Normalize metadata for Rust FFI compatibility
+            normalized_metadata = _normalize_metadata(metadata)
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
                 success=False,
                 output_data=b"",
                 state_update=None,
                 error_message=error_msg,
-                metadata=None,
+                metadata=normalized_metadata,
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
+        finally:
+            # Always reset context to prevent leakage between executions
+            _current_context.reset(token)
     async def _execute_entity(self, entity_type, input_data: bytes, request):
         """Execute an entity method."""
         import json
@@ -1022,6 +1336,16 @@ class Worker:
             if not method_name:
                 raise ValueError("Entity invocation requires 'method' parameter")
+            # Create context for logging and tracing
+            ctx = Context(
+                run_id=f"{self.service_name}:{entity_type.name}:{entity_key}",
+                runtime_context=request.runtime_context,
+            )
+            # Set context in contextvar so get_current_context() and error handlers can access it
+            from .context import set_current_context, _current_context
+            token = set_current_context(ctx)
             # Note: State loading is now handled automatically by the entity method wrapper
             # via EntityStateAdapter which uses the Rust core for cache + platform persistence
@@ -1042,7 +1366,9 @@ class Worker:
             # Note: State persistence is now handled automatically by the entity method wrapper
             # via EntityStateAdapter which uses Rust core for optimistic locking + version tracking
-            metadata = {}
+            # CRITICAL: Propagate tenant_id and deployment_id to prevent journal corruption
+            metadata = self._extract_critical_metadata(request)
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
@@ -1050,28 +1376,58 @@ class Worker:
                 output_data=output_data,
                 state_update=None,  # TODO: Use structured StateUpdate object
                 error_message=None,
-                metadata=metadata,  # Include state in metadata for Worker Coordinator
+                metadata=metadata if metadata else None,  # Include state in metadata for Worker Coordinator
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
         except Exception as e:
             # Include exception type for better error messages
             error_msg = f"{type(e).__name__}: {str(e)}"
-            logger.error(f"Entity execution failed: {error_msg}", exc_info=True)
+            # Capture full stack trace for telemetry
+            import traceback
+            stack_trace = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
+            # Log with full traceback using ctx.logger to ensure run_id correlation
+            from .context import get_current_context
+            current_ctx = get_current_context()
+            error_logger = current_ctx.logger if current_ctx else logger
+            error_logger.error(f"Entity execution failed: {error_msg}", exc_info=True)
+            # Store error metadata for observability
+            metadata = {
+                "error_type": type(e).__name__,
+                "stack_trace": stack_trace,
+                "error": True,
+            }
+            # Extract critical metadata for journal correlation (if available)
+            critical_metadata = self._extract_critical_metadata(request)
+            metadata.update(critical_metadata)
+            # Normalize metadata for Rust FFI compatibility
+            normalized_metadata = _normalize_metadata(metadata)
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
                 success=False,
                 output_data=b"",
                 state_update=None,
                 error_message=error_msg,
-                metadata=None,
+                metadata=normalized_metadata,
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
+        finally:
+            # Always reset context to prevent leakage between executions
+            _current_context.reset(token)
     async def _execute_agent(self, agent, input_data: bytes, request):
         """Execute an agent with session support for multi-turn conversations."""
         import json
@@ -1112,6 +1468,10 @@ class Worker:
                 runtime_context=request.runtime_context,
             )
+            # Set context in contextvar so get_current_context() and error handlers can access it
+            from .context import set_current_context, _current_context
+            token = set_current_context(ctx)
             # Execute agent - conversation history is automatically included
             agent_result = await agent.run(user_message, context=ctx)
@@ -1124,8 +1484,10 @@ class Worker:
             # Serialize result
             output_data = json.dumps(result).encode("utf-8")
-            # Return session_id in metadata so UI can persist it
-            metadata = {"session_id": session_id}
+            # CRITICAL: Propagate tenant_id and deployment_id to prevent journal corruption
+            metadata = self._extract_critical_metadata(request)
+            # Also include session_id for UI to persist conversation
+            metadata["session_id"] = session_id
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
@@ -1133,28 +1495,58 @@ class Worker:
                 output_data=output_data,
                 state_update=None,
                 error_message=None,
-                metadata=metadata,
+                metadata=metadata if metadata else None,
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
         except Exception as e:
             # Include exception type for better error messages
             error_msg = f"{type(e).__name__}: {str(e)}"
-            logger.error(f"Agent execution failed: {error_msg}", exc_info=True)
+            # Capture full stack trace for telemetry
+            import traceback
+            stack_trace = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
+            # Log with full traceback using ctx.logger to ensure run_id correlation
+            from .context import get_current_context
+            current_ctx = get_current_context()
+            error_logger = current_ctx.logger if current_ctx else logger
+            error_logger.error(f"Agent execution failed: {error_msg}", exc_info=True)
+            # Store error metadata for observability
+            metadata = {
+                "error_type": type(e).__name__,
+                "stack_trace": stack_trace,
+                "error": True,
+            }
+            # Extract critical metadata for journal correlation (if available)
+            critical_metadata = self._extract_critical_metadata(request)
+            metadata.update(critical_metadata)
+            # Normalize metadata for Rust FFI compatibility
+            normalized_metadata = _normalize_metadata(metadata)
             return PyExecuteComponentResponse(
                 invocation_id=request.invocation_id,
                 success=False,
                 output_data=b"",
                 state_update=None,
                 error_message=error_msg,
-                metadata=None,
+                metadata=normalized_metadata,
                 is_chunk=False,
                 done=True,
                 chunk_index=0,
+                attempt=getattr(request, 'attempt', 0),
             )
+        finally:
+            # Always reset context to prevent leakage between executions
+            _current_context.reset(token)
     def _create_error_response(self, request, error_message: str):
         """Create an error response."""
         from ._core import PyExecuteComponentResponse
@@ -1169,6 +1561,7 @@ class Worker:
             is_chunk=False,
             done=True,
             chunk_index=0,
+            attempt=getattr(request, 'attempt', 0),
         )
     async def run(self):