PyPI - agent-runtime-core - Versions diffs - 0.4.0__tar.gz → 0.5.1__tar.gz - Mend

agent-runtime-core 0.4.0tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{agent_runtime_core-0.4.0 → agent_runtime_core-0.5.1}/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2026 Chris Olstrom
+Copyright (c) 2026 Chris Barry
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

{agent_runtime_core-0.4.0 → agent_runtime_core-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.4
 Name: agent-runtime-core
-Version: 0.4.0
+Version: 0.5.1
 Summary: Framework-agnostic Python library for executing AI agents with consistent patterns
-Project-URL: Homepage, https://github.com/colstrom/agent_runtime_core
-Project-URL: Repository, https://github.com/colstrom/agent_runtime_core
-Author: Chris Olstrom
+Project-URL: Homepage, https://github.com/makemore/agent-runtime-core
+Project-URL: Repository, https://github.com/makemore/agent-runtime-core
+Author: Chris Barry
 License-Expression: MIT
 License-File: LICENSE
 Keywords: agents,ai,async,llm,runtime
@@ -720,6 +720,98 @@ result = await run_agent_test(MyAgent(), ctx)
 assert result.final_output["response"] == "Hi there!"
 ```
+## Step Executor
+The `StepExecutor` provides a structured way to execute multi-step operations with automatic checkpointing, resume capability, retries, and progress reporting. Ideal for long-running agent tasks.
+### Basic Usage
+```python
+from agent_runtime_core.steps import StepExecutor, Step
+class MyAgent(AgentRuntime):
+    async def run(self, ctx: RunContext) -> RunResult:
+        executor = StepExecutor(ctx)
+        results = await executor.run([
+            Step("fetch", self.fetch_data),
+            Step("process", self.process_data, retries=3),
+            Step("validate", self.validate_results),
+        ])
+        return RunResult(final_output=results)
+    async def fetch_data(self, ctx, state):
+        # Fetch data from external API
+        return {"items": [...]}
+    async def process_data(self, ctx, state):
+        # Access results from previous steps via state
+        return {"processed": True}
+    async def validate_results(self, ctx, state):
+        return {"valid": True}
+```
+### Step Options
+```python
+Step(
+    name="process",              # Unique step identifier
+    fn=process_data,             # Async function(ctx, state) -> result
+    retries=3,                   # Retry attempts on failure (default: 0)
+    retry_delay=2.0,             # Seconds between retries (default: 1.0)
+    timeout=30.0,                # Step timeout in seconds (optional)
+    description="Process data",  # Human-readable description
+    checkpoint=True,             # Save checkpoint after step (default: True)
+)
+```
+### Resume from Checkpoint
+Steps automatically checkpoint after completion. If execution is interrupted, it resumes from the last checkpoint:
+```python
+# First run - completes step1, fails during step2
+executor = StepExecutor(ctx)
+await executor.run([step1, step2, step3])  # Checkpoints after step1
+# Second run - skips step1, resumes from step2
+executor = StepExecutor(ctx)
+await executor.run([step1, step2, step3])  # step1 skipped
+```
+### Custom State
+Pass state between steps using `initial_state` and the `state` dict:
+```python
+async def step1(ctx, state):
+    state["counter"] = 1
+    return "done"
+async def step2(ctx, state):
+    state["counter"] += 1  # Access state from step1
+    return state["counter"]
+executor = StepExecutor(ctx)
+results = await executor.run(
+    [Step("step1", step1), Step("step2", step2)],
+    initial_state={"counter": 0},
+)
+```
+### Events
+The executor emits events for observability:
+- `EventType.STEP_STARTED` - Step execution began
+- `EventType.STEP_COMPLETED` - Step completed successfully
+- `EventType.STEP_FAILED` - Step failed after all retries
+- `EventType.STEP_RETRYING` - Step is being retried
+- `EventType.STEP_SKIPPED` - Step skipped (already completed)
+- `EventType.PROGRESS_UPDATE` - Progress percentage update
 ## API Reference
 ### Configuration

{agent_runtime_core-0.4.0 → agent_runtime_core-0.5.1}/README.md RENAMED Viewed

@@ -677,6 +677,98 @@ result = await run_agent_test(MyAgent(), ctx)
 assert result.final_output["response"] == "Hi there!"
 ```
+## Step Executor
+The `StepExecutor` provides a structured way to execute multi-step operations with automatic checkpointing, resume capability, retries, and progress reporting. Ideal for long-running agent tasks.
+### Basic Usage
+```python
+from agent_runtime_core.steps import StepExecutor, Step
+class MyAgent(AgentRuntime):
+    async def run(self, ctx: RunContext) -> RunResult:
+        executor = StepExecutor(ctx)
+        results = await executor.run([
+            Step("fetch", self.fetch_data),
+            Step("process", self.process_data, retries=3),
+            Step("validate", self.validate_results),
+        ])
+        return RunResult(final_output=results)
+    async def fetch_data(self, ctx, state):
+        # Fetch data from external API
+        return {"items": [...]}
+    async def process_data(self, ctx, state):
+        # Access results from previous steps via state
+        return {"processed": True}
+    async def validate_results(self, ctx, state):
+        return {"valid": True}
+```
+### Step Options
+```python
+Step(
+    name="process",              # Unique step identifier
+    fn=process_data,             # Async function(ctx, state) -> result
+    retries=3,                   # Retry attempts on failure (default: 0)
+    retry_delay=2.0,             # Seconds between retries (default: 1.0)
+    timeout=30.0,                # Step timeout in seconds (optional)
+    description="Process data",  # Human-readable description
+    checkpoint=True,             # Save checkpoint after step (default: True)
+)
+```
+### Resume from Checkpoint
+Steps automatically checkpoint after completion. If execution is interrupted, it resumes from the last checkpoint:
+```python
+# First run - completes step1, fails during step2
+executor = StepExecutor(ctx)
+await executor.run([step1, step2, step3])  # Checkpoints after step1
+# Second run - skips step1, resumes from step2
+executor = StepExecutor(ctx)
+await executor.run([step1, step2, step3])  # step1 skipped
+```
+### Custom State
+Pass state between steps using `initial_state` and the `state` dict:
+```python
+async def step1(ctx, state):
+    state["counter"] = 1
+    return "done"
+async def step2(ctx, state):
+    state["counter"] += 1  # Access state from step1
+    return state["counter"]
+executor = StepExecutor(ctx)
+results = await executor.run(
+    [Step("step1", step1), Step("step2", step2)],
+    initial_state={"counter": 0},
+)
+```
+### Events
+The executor emits events for observability:
+- `EventType.STEP_STARTED` - Step execution began
+- `EventType.STEP_COMPLETED` - Step completed successfully
+- `EventType.STEP_FAILED` - Step failed after all retries
+- `EventType.STEP_RETRYING` - Step is being retried
+- `EventType.STEP_SKIPPED` - Step skipped (already completed)
+- `EventType.PROGRESS_UPDATE` - Progress percentage update
 ## API Reference
 ### Configuration

{agent_runtime_core-0.4.0 → agent_runtime_core-0.5.1}/agent_runtime_core/__init__.py RENAMED Viewed

@@ -34,7 +34,7 @@ Example usage:
             return RunResult(final_output={"message": "Hello!"})
 """
-__version__ = "0.3.0"
+__version__ = "0.5.1"
 # Core interfaces
 from agent_runtime_core.interfaces import (
@@ -76,6 +76,16 @@ from agent_runtime_core.runner import (
     RunContextImpl,
 )
+# Step execution for long-running multi-step agents
+from agent_runtime_core.steps import (
+    Step,
+    StepExecutor,
+    StepResult,
+    StepStatus,
+    ExecutionState,
+    StepExecutionError,
+    StepCancelledError,
+)
 # Testing utilities
 from agent_runtime_core.testing import (
@@ -146,6 +156,14 @@ __all__ = [
     "AgentRunner",
     "RunnerConfig",
     "RunContextImpl",
+    # Step execution
+    "Step",
+    "StepExecutor",
+    "StepResult",
+    "StepStatus",
+    "ExecutionState",
+    "StepExecutionError",
+    "StepCancelledError",
     # Testing
     "MockRunContext",
     "MockLLMClient",

{agent_runtime_core-0.4.0 → agent_runtime_core-0.5.1}/agent_runtime_core/interfaces.py RENAMED Viewed

@@ -41,6 +41,14 @@ class EventType(str, Enum):
     # State events
     STATE_CHECKPOINT = "state.checkpoint"
+    # Step execution events (for long-running multi-step agents)
+    STEP_STARTED = "step.started"
+    STEP_COMPLETED = "step.completed"
+    STEP_FAILED = "step.failed"
+    STEP_SKIPPED = "step.skipped"  # When resuming from checkpoint
+    STEP_RETRYING = "step.retrying"
+    PROGRESS_UPDATE = "progress.update"  # General progress reporting
 class Message(TypedDict, total=False):
     """

agent_runtime_core-0.5.1/agent_runtime_core/steps.py ADDED Viewed

@@ -0,0 +1,373 @@
+"""
+Step executor for long-running multi-step agent operations.
+This module provides a structured way to execute multi-step operations
+with automatic checkpointing, resume capability, retries, and progress
+reporting.
+Example usage:
+    from agent_runtime_core.steps import StepExecutor, Step
+    class MyAgent(AgentRuntime):
+        async def run(self, ctx: RunContext) -> RunResult:
+            executor = StepExecutor(ctx)
+            result = await executor.run([
+                Step("fetch", self.fetch_data),
+                Step("process", self.process_data, retries=3),
+                Step("validate", self.validate),
+            ])
+            return RunResult(final_output=result)
+"""
+import asyncio
+import traceback
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Awaitable, Callable, Optional, TypeVar, Union
+from uuid import UUID, uuid4
+from agent_runtime_core.interfaces import EventType, RunContext
+class StepStatus(str, Enum):
+    """Status of a step execution."""
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+    CANCELLED = "cancelled"
+# Type for step functions: async def step_fn(ctx, state) -> result
+StepFunction = Callable[[RunContext, dict], Awaitable[Any]]
+@dataclass
+class Step:
+    """
+    Definition of a single step in a multi-step operation.
+    Args:
+        name: Unique identifier for this step
+        fn: Async function to execute. Receives (ctx, state) and returns result.
+        retries: Number of retry attempts on failure (default: 0)
+        retry_delay: Seconds to wait between retries (default: 1.0)
+        timeout: Optional timeout in seconds for this step
+        description: Human-readable description for progress reporting
+        checkpoint: Whether to checkpoint after this step (default: True)
+    """
+    name: str
+    fn: StepFunction
+    retries: int = 0
+    retry_delay: float = 1.0
+    timeout: Optional[float] = None
+    description: Optional[str] = None
+    checkpoint: bool = True
+@dataclass
+class StepResult:
+    """Result of executing a single step."""
+    name: str
+    status: StepStatus
+    result: Any = None
+    error: Optional[str] = None
+    attempts: int = 1
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    duration_ms: Optional[float] = None
+@dataclass
+class ExecutionState:
+    """
+    State of a multi-step execution.
+    This is what gets checkpointed and can be used to resume.
+    """
+    execution_id: UUID = field(default_factory=uuid4)
+    current_step_index: int = 0
+    completed_steps: list[str] = field(default_factory=list)
+    step_results: dict[str, Any] = field(default_factory=dict)
+    started_at: datetime = field(default_factory=datetime.utcnow)
+    custom_state: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        """Convert to dictionary for checkpointing."""
+        return {
+            "execution_id": str(self.execution_id),
+            "current_step_index": self.current_step_index,
+            "completed_steps": self.completed_steps,
+            "step_results": self.step_results,
+            "started_at": self.started_at.isoformat(),
+            "custom_state": self.custom_state,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "ExecutionState":
+        """Restore from checkpointed dictionary."""
+        return cls(
+            execution_id=UUID(data["execution_id"]),
+            current_step_index=data["current_step_index"],
+            completed_steps=data["completed_steps"],
+            step_results=data["step_results"],
+            started_at=datetime.fromisoformat(data["started_at"]),
+            custom_state=data.get("custom_state", {}),
+        )
+class StepExecutionError(Exception):
+    """Raised when step execution fails after all retries."""
+    def __init__(self, step_name: str, message: str, attempts: int):
+        self.step_name = step_name
+        self.attempts = attempts
+        super().__init__(f"Step '{step_name}' failed after {attempts} attempts: {message}")
+class StepCancelledError(Exception):
+    """Raised when execution is cancelled."""
+    def __init__(self, step_name: str):
+        self.step_name = step_name
+        super().__init__(f"Execution cancelled during step '{step_name}'")
+class StepExecutor:
+    """
+    Executes a sequence of steps with checkpointing and resume capability.
+    Features:
+    - Automatic checkpointing after each step
+    - Resume from last checkpoint on restart
+    - Per-step retries with configurable delay
+    - Progress reporting via events
+    - Cancellation support
+    - Step-level timeouts
+    Example:
+        executor = StepExecutor(ctx)
+        result = await executor.run([
+            Step("fetch", fetch_data),
+            Step("process", process_data, retries=3),
+            Step("save", save_results),
+        ])
+    """
+    def __init__(
+        self,
+        ctx: RunContext,
+        *,
+        checkpoint_key: str = "_step_executor_state",
+        cancel_check_interval: float = 0.5,
+    ):
+        """
+        Initialize the step executor.
+        Args:
+            ctx: The run context from the agent runtime
+            checkpoint_key: Key used for storing execution state
+            cancel_check_interval: How often to check for cancellation (seconds)
+        """
+        self.ctx = ctx
+        self.checkpoint_key = checkpoint_key
+        self.cancel_check_interval = cancel_check_interval
+        self._state: Optional[ExecutionState] = None
+    async def run(
+        self,
+        steps: list[Step],
+        *,
+        initial_state: Optional[dict] = None,
+        resume: bool = True,
+    ) -> dict[str, Any]:
+        """
+        Execute a sequence of steps.
+        Args:
+            steps: List of steps to execute
+            initial_state: Optional initial custom state
+            resume: Whether to resume from checkpoint if available
+        Returns:
+            Dictionary mapping step names to their results
+        Raises:
+            StepExecutionError: If a step fails after all retries
+            StepCancelledError: If execution is cancelled
+        """
+        # Try to resume from checkpoint
+        if resume:
+            self._state = await self._load_state()
+        if self._state is None:
+            self._state = ExecutionState(
+                custom_state=initial_state or {}
+            )
+        total_steps = len(steps)
+        for i, step in enumerate(steps):
+            # Skip already completed steps
+            if step.name in self._state.completed_steps:
+                await self.ctx.emit(EventType.STEP_SKIPPED, {
+                    "step_name": step.name,
+                    "step_index": i,
+                    "total_steps": total_steps,
+                    "reason": "already_completed",
+                })
+                continue
+            # Check for cancellation
+            if self.ctx.cancelled():
+                raise StepCancelledError(step.name)
+            # Update state
+            self._state.current_step_index = i
+            # Execute the step
+            result = await self._execute_step(step, i, total_steps)
+            # Record completion
+            self._state.completed_steps.append(step.name)
+            self._state.step_results[step.name] = result.result
+            # Checkpoint if enabled
+            if step.checkpoint:
+                await self._save_state()
+        return self._state.step_results
+    async def _execute_step(
+        self,
+        step: Step,
+        index: int,
+        total: int,
+    ) -> StepResult:
+        """Execute a single step with retries."""
+        attempts = 0
+        last_error: Optional[str] = None
+        while attempts <= step.retries:
+            attempts += 1
+            # Emit started event
+            await self.ctx.emit(EventType.STEP_STARTED, {
+                "step_name": step.name,
+                "step_index": index,
+                "total_steps": total,
+                "attempt": attempts,
+                "max_attempts": step.retries + 1,
+                "description": step.description,
+            })
+            # Emit progress
+            await self.ctx.emit(EventType.PROGRESS_UPDATE, {
+                "step_name": step.name,
+                "step_index": index,
+                "total_steps": total,
+                "progress_percent": (index / total) * 100,
+                "description": step.description or f"Executing {step.name}",
+            })
+            started_at = datetime.utcnow()
+            try:
+                # Execute with optional timeout
+                if step.timeout:
+                    result = await asyncio.wait_for(
+                        step.fn(self.ctx, self._state.custom_state),
+                        timeout=step.timeout,
+                    )
+                else:
+                    result = await step.fn(self.ctx, self._state.custom_state)
+                completed_at = datetime.utcnow()
+                duration_ms = (completed_at - started_at).total_seconds() * 1000
+                # Emit completed event
+                await self.ctx.emit(EventType.STEP_COMPLETED, {
+                    "step_name": step.name,
+                    "step_index": index,
+                    "total_steps": total,
+                    "attempt": attempts,
+                    "duration_ms": duration_ms,
+                })
+                return StepResult(
+                    name=step.name,
+                    status=StepStatus.COMPLETED,
+                    result=result,
+                    attempts=attempts,
+                    started_at=started_at,
+                    completed_at=completed_at,
+                    duration_ms=duration_ms,
+                )
+            except asyncio.CancelledError:
+                raise StepCancelledError(step.name)
+            except asyncio.TimeoutError:
+                last_error = f"Step timed out after {step.timeout}s"
+            except Exception as e:
+                last_error = f"{type(e).__name__}: {str(e)}"
+            # Check if we should retry
+            if attempts <= step.retries:
+                await self.ctx.emit(EventType.STEP_RETRYING, {
+                    "step_name": step.name,
+                    "step_index": index,
+                    "attempt": attempts,
+                    "max_attempts": step.retries + 1,
+                    "error": last_error,
+                    "retry_delay": step.retry_delay,
+                })
+                await asyncio.sleep(step.retry_delay)
+        # All retries exhausted
+        await self.ctx.emit(EventType.STEP_FAILED, {
+            "step_name": step.name,
+            "step_index": index,
+            "total_steps": total,
+            "attempts": attempts,
+            "error": last_error,
+        })
+        raise StepExecutionError(step.name, last_error or "Unknown error", attempts)
+    async def _load_state(self) -> Optional[ExecutionState]:
+        """Load execution state from checkpoint."""
+        checkpoint = await self.ctx.get_state()
+        if checkpoint and self.checkpoint_key in checkpoint:
+            try:
+                return ExecutionState.from_dict(checkpoint[self.checkpoint_key])
+            except (KeyError, ValueError):
+                return None
+        return None
+    async def _save_state(self) -> None:
+        """Save execution state to checkpoint."""
+        checkpoint = await self.ctx.get_state() or {}
+        checkpoint[self.checkpoint_key] = self._state.to_dict()
+        await self.ctx.checkpoint(checkpoint)
+    @property
+    def state(self) -> Optional[ExecutionState]:
+        """Get the current execution state."""
+        return self._state
+    def update_custom_state(self, updates: dict) -> None:
+        """Update custom state (will be checkpointed with next step)."""
+        if self._state:
+            self._state.custom_state.update(updates)

{agent_runtime_core-0.4.0 → agent_runtime_core-0.5.1}/pyproject.toml RENAMED Viewed

@@ -4,13 +4,13 @@ build-backend = "hatchling.build"
 [project]
 name = "agent-runtime-core"
-version = "0.4.0"
+version = "0.5.1"
 description = "Framework-agnostic Python library for executing AI agents with consistent patterns"
 readme = "README.md"
 license = "MIT"
 requires-python = ">=3.11"
 authors = [
-    { name = "Chris Olstrom" }
+    { name = "Chris Barry" }
 ]
 classifiers = [
     "Development Status :: 3 - Alpha",
@@ -48,8 +48,8 @@ dev = [
 ]
 [project.urls]
-Homepage = "https://github.com/colstrom/agent_runtime_core"
-Repository = "https://github.com/colstrom/agent_runtime_core"
+Homepage = "https://github.com/makemore/agent-runtime-core"
+Repository = "https://github.com/makemore/agent-runtime-core"
 [tool.hatch.build.targets.wheel]
 packages = ["agent_runtime_core"]

agent_runtime_core-0.5.1/tests/test_steps.py ADDED Viewed

@@ -0,0 +1,365 @@
+"""Tests for step execution module."""
+import pytest
+import asyncio
+from datetime import datetime
+from uuid import uuid4
+from agent_runtime_core.steps import (
+    Step,
+    StepExecutor,
+    StepResult,
+    StepStatus,
+    ExecutionState,
+    StepExecutionError,
+    StepCancelledError,
+)
+from agent_runtime_core.interfaces import EventType
+from agent_runtime_core.testing import MockRunContext
+class TestStep:
+    """Tests for Step dataclass."""
+    def test_step_defaults(self):
+        """Test Step with default values."""
+        async def dummy(ctx, state):
+            return "result"
+        step = Step("test", dummy)
+        assert step.name == "test"
+        assert step.fn == dummy
+        assert step.retries == 0
+        assert step.retry_delay == 1.0
+        assert step.timeout is None
+        assert step.description is None
+        assert step.checkpoint is True
+    def test_step_with_options(self):
+        """Test Step with custom options."""
+        async def dummy(ctx, state):
+            return "result"
+        step = Step(
+            name="fetch",
+            fn=dummy,
+            retries=3,
+            retry_delay=2.0,
+            timeout=30.0,
+            description="Fetch data from API",
+            checkpoint=False,
+        )
+        assert step.retries == 3
+        assert step.retry_delay == 2.0
+        assert step.timeout == 30.0
+        assert step.description == "Fetch data from API"
+        assert step.checkpoint is False
+class TestExecutionState:
+    """Tests for ExecutionState."""
+    def test_state_defaults(self):
+        """Test ExecutionState with defaults."""
+        state = ExecutionState()
+        assert state.current_step_index == 0
+        assert state.completed_steps == []
+        assert state.step_results == {}
+        assert state.custom_state == {}
+    def test_state_serialization(self):
+        """Test state to_dict and from_dict."""
+        state = ExecutionState(
+            current_step_index=2,
+            completed_steps=["step1", "step2"],
+            step_results={"step1": "result1", "step2": "result2"},
+            custom_state={"key": "value"},
+        )
+        data = state.to_dict()
+        restored = ExecutionState.from_dict(data)
+        assert restored.current_step_index == 2
+        assert restored.completed_steps == ["step1", "step2"]
+        assert restored.step_results == {"step1": "result1", "step2": "result2"}
+        assert restored.custom_state == {"key": "value"}
+class TestStepExecutor:
+    """Tests for StepExecutor."""
+    @pytest.mark.asyncio
+    async def test_execute_single_step(self):
+        """Test executing a single step."""
+        ctx = MockRunContext()
+        async def my_step(ctx, state):
+            return {"data": "hello"}
+        executor = StepExecutor(ctx)
+        results = await executor.run([Step("my_step", my_step)])
+        assert results == {"my_step": {"data": "hello"}}
+    @pytest.mark.asyncio
+    async def test_execute_multiple_steps(self):
+        """Test executing multiple steps in sequence."""
+        ctx = MockRunContext()
+        call_order = []
+        async def step1(ctx, state):
+            call_order.append("step1")
+            return "result1"
+        async def step2(ctx, state):
+            call_order.append("step2")
+            return "result2"
+        async def step3(ctx, state):
+            call_order.append("step3")
+            return "result3"
+        executor = StepExecutor(ctx)
+        results = await executor.run([
+            Step("step1", step1),
+            Step("step2", step2),
+            Step("step3", step3),
+        ])
+        assert call_order == ["step1", "step2", "step3"]
+        assert results == {
+            "step1": "result1",
+            "step2": "result2",
+            "step3": "result3",
+        }
+    @pytest.mark.asyncio
+    async def test_step_receives_custom_state(self):
+        """Test that steps receive and can modify custom state."""
+        ctx = MockRunContext()
+        async def step1(ctx, state):
+            state["counter"] = 1
+            return "done"
+        async def step2(ctx, state):
+            state["counter"] += 1
+            return state["counter"]
+        executor = StepExecutor(ctx)
+        results = await executor.run(
+            [Step("step1", step1), Step("step2", step2)],
+            initial_state={"counter": 0},
+        )
+        assert results["step2"] == 2
+    @pytest.mark.asyncio
+    async def test_step_emits_events(self):
+        """Test that step execution emits proper events."""
+        ctx = MockRunContext()
+        async def my_step(ctx, state):
+            return "done"
+        executor = StepExecutor(ctx)
+        await executor.run([Step("my_step", my_step, description="Test step")])
+        events = ctx.get_events()
+        event_types = [event_type for event_type, _ in events]
+        assert EventType.STEP_STARTED.value in event_types
+        assert EventType.STEP_COMPLETED.value in event_types
+        assert EventType.PROGRESS_UPDATE.value in event_types
+    @pytest.mark.asyncio
+    async def test_step_retry_on_failure(self):
+        """Test that steps retry on failure."""
+        ctx = MockRunContext()
+        attempts = []
+        async def flaky_step(ctx, state):
+            attempts.append(1)
+            if len(attempts) < 3:
+                raise ValueError("Temporary failure")
+            return "success"
+        executor = StepExecutor(ctx)
+        results = await executor.run([
+            Step("flaky", flaky_step, retries=3, retry_delay=0.01)
+        ])
+        assert len(attempts) == 3
+        assert results["flaky"] == "success"
+        # Check retry events
+        events = ctx.get_events()
+        retry_events = [
+            (event_type, payload)
+            for event_type, payload in events
+            if event_type == EventType.STEP_RETRYING.value
+        ]
+        assert len(retry_events) == 2  # 2 retries before success
+    @pytest.mark.asyncio
+    async def test_step_fails_after_max_retries(self):
+        """Test that step fails after exhausting retries."""
+        ctx = MockRunContext()
+        async def always_fails(ctx, state):
+            raise ValueError("Always fails")
+        executor = StepExecutor(ctx)
+        with pytest.raises(StepExecutionError) as exc_info:
+            await executor.run([
+                Step("failing", always_fails, retries=2, retry_delay=0.01)
+            ])
+        assert exc_info.value.step_name == "failing"
+        assert exc_info.value.attempts == 3  # 1 initial + 2 retries
+    @pytest.mark.asyncio
+    async def test_step_timeout(self):
+        """Test step timeout handling."""
+        ctx = MockRunContext()
+        async def slow_step(ctx, state):
+            await asyncio.sleep(10)
+            return "done"
+        executor = StepExecutor(ctx)
+        with pytest.raises(StepExecutionError) as exc_info:
+            await executor.run([
+                Step("slow", slow_step, timeout=0.1)
+            ])
+        assert "timed out" in str(exc_info.value)
+    @pytest.mark.asyncio
+    async def test_cancellation(self):
+        """Test cancellation during execution."""
+        ctx = MockRunContext()
+        async def step1(ctx, state):
+            ctx.cancel()  # Cancel during first step
+            return "done"
+        async def step2(ctx, state):
+            return "should not run"
+        executor = StepExecutor(ctx)
+        with pytest.raises(StepCancelledError):
+            await executor.run([
+                Step("step1", step1),
+                Step("step2", step2),
+            ])
+    @pytest.mark.asyncio
+    async def test_checkpoint_and_resume(self):
+        """Test checkpointing and resuming execution."""
+        ctx = MockRunContext()
+        call_order = []
+        async def step1(ctx, state):
+            call_order.append("step1")
+            return "result1"
+        async def step2(ctx, state):
+            call_order.append("step2")
+            return "result2"
+        # First run - complete step1
+        executor1 = StepExecutor(ctx)
+        await executor1.run([Step("step1", step1)])
+        # Second run - should skip step1, run step2
+        executor2 = StepExecutor(ctx)
+        results = await executor2.run([
+            Step("step1", step1),
+            Step("step2", step2),
+        ])
+        # step1 should only be called once (first run)
+        # step2 should be called in second run
+        assert call_order == ["step1", "step2"]
+        assert results["step2"] == "result2"
+        # Check skip event
+        events = ctx.get_events()
+        skip_events = [
+            (event_type, payload)
+            for event_type, payload in events
+            if event_type == EventType.STEP_SKIPPED.value
+        ]
+        assert len(skip_events) == 1
+        _, payload = skip_events[0]
+        assert payload["step_name"] == "step1"
+    @pytest.mark.asyncio
+    async def test_resume_disabled(self):
+        """Test that resume can be disabled."""
+        ctx = MockRunContext()
+        call_count = 0
+        async def my_step(ctx, state):
+            nonlocal call_count
+            call_count += 1
+            return "done"
+        # First run
+        executor1 = StepExecutor(ctx)
+        await executor1.run([Step("my_step", my_step)])
+        # Second run with resume=False
+        executor2 = StepExecutor(ctx)
+        await executor2.run([Step("my_step", my_step)], resume=False)
+        # Step should be called twice
+        assert call_count == 2
+    @pytest.mark.asyncio
+    async def test_update_custom_state(self):
+        """Test updating custom state via executor."""
+        ctx = MockRunContext()
+        async def my_step(ctx, state):
+            return state.get("value", 0)
+        executor = StepExecutor(ctx)
+        executor._state = ExecutionState(custom_state={"value": 42})
+        executor.update_custom_state({"value": 100})
+        assert executor.state.custom_state["value"] == 100
+    @pytest.mark.asyncio
+    async def test_progress_percentage(self):
+        """Test progress percentage calculation."""
+        ctx = MockRunContext()
+        async def dummy(ctx, state):
+            return "done"
+        executor = StepExecutor(ctx)
+        await executor.run([
+            Step("step1", dummy),
+            Step("step2", dummy),
+            Step("step3", dummy),
+            Step("step4", dummy),
+        ])
+        events = ctx.get_events()
+        progress_events = [
+            (event_type, payload)
+            for event_type, payload in events
+            if event_type == EventType.PROGRESS_UPDATE.value
+        ]
+        # Check progress percentages
+        percentages = [payload["progress_percent"] for _, payload in progress_events]
+        assert percentages == [0.0, 25.0, 50.0, 75.0]