PyPI - agent-runtime-core - Versions diffs - 0.1.1__tar.gz → 0.1.2__tar.gz - Mend

agent-runtime-core 0.1.1tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/.gitignore RENAMED Viewed

@@ -147,3 +147,4 @@ cython_debug/
 # OS
 .DS_Store
 Thumbs.db
+.pypirc

{agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agent-runtime-core
-Version: 0.1.1
+Version: 0.1.2
 Summary: Framework-agnostic Python library for executing AI agents with consistent patterns
 Project-URL: Homepage, https://github.com/colstrom/agent_runtime
 Project-URL: Repository, https://github.com/colstrom/agent_runtime

{agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/__init__.py RENAMED Viewed

@@ -34,7 +34,7 @@ Example usage:
             return RunResult(final_output={"message": "Hello!"})
 """
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 # Core interfaces
 from agent_runtime.interfaces import (
@@ -76,6 +76,17 @@ from agent_runtime.runner import (
     RunContextImpl,
 )
+# Testing utilities
+from agent_runtime.testing import (
+    MockRunContext,
+    MockLLMClient,
+    MockLLMResponse,
+    LLMEvaluator,
+    create_test_context,
+    run_agent_test,
+)
 __all__ = [
     # Version
     "__version__",
@@ -107,4 +118,11 @@ __all__ = [
     "AgentRunner",
     "RunnerConfig",
     "RunContextImpl",
+    # Testing
+    "MockRunContext",
+    "MockLLMClient",
+    "MockLLMResponse",
+    "LLMEvaluator",
+    "create_test_context",
+    "run_agent_test",
 ]

agent_runtime_core-0.1.2/agent_runtime/testing.py ADDED Viewed

@@ -0,0 +1,358 @@
+"""
+Testing utilities for agent runtimes.
+This module provides tools for testing agent implementations:
+- MockRunContext: A concrete RunContext for unit tests
+- MockLLMClient: A mock LLM client with predefined responses
+- AgentTestCase: Base test class with common helpers
+- LLMEvaluator: Use LLM to evaluate agent responses
+Example usage:
+    from agent_runtime.testing import MockRunContext, MockLLMClient, AgentTestCase
+    class TestMyAgent(AgentTestCase):
+        async def test_agent_responds(self):
+            ctx = self.create_context("Hello, agent!")
+            result = await self.agent.run(ctx)
+            self.assertIn("response", result.final_output)
+"""
+import asyncio
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional, AsyncIterator
+from uuid import UUID, uuid4
+import json
+from .interfaces import (
+    AgentRuntime,
+    EventType,
+    LLMClient,
+    LLMResponse,
+    LLMStreamChunk,
+    Message,
+    RunContext,
+    RunResult,
+    Tool,
+    ToolRegistry,
+)
+@dataclass
+class MockRunContext:
+    """
+    A concrete implementation of RunContext for testing.
+    Use this in unit tests to provide a context to your agent
+    without needing the full runtime infrastructure.
+    Example:
+        ctx = MockRunContext(
+            input_messages=[{"role": "user", "content": "Hello"}],
+            metadata={"user_id": "123"}
+        )
+        result = await my_agent.run(ctx)
+    """
+    input_messages: list[Message] = field(default_factory=list)
+    params: dict = field(default_factory=dict)
+    metadata: dict = field(default_factory=dict)
+    run_id: UUID = field(default_factory=uuid4)
+    conversation_id: Optional[UUID] = None
+    tool_registry: ToolRegistry = field(default_factory=ToolRegistry)
+    # Internal state
+    _events: list[tuple[str, dict]] = field(default_factory=list)
+    _checkpoints: list[dict] = field(default_factory=list)
+    _cancelled: bool = False
+    async def emit(self, event_type: EventType | str, payload: dict) -> None:
+        """Record emitted events for later inspection."""
+        event_name = event_type.value if isinstance(event_type, EventType) else event_type
+        self._events.append((event_name, payload))
+    async def checkpoint(self, state: dict) -> None:
+        """Save a checkpoint."""
+        self._checkpoints.append(state)
+    async def get_state(self) -> Optional[dict]:
+        """Get the last checkpoint."""
+        return self._checkpoints[-1] if self._checkpoints else None
+    def cancelled(self) -> bool:
+        """Check if cancelled."""
+        return self._cancelled
+    def cancel(self) -> None:
+        """Request cancellation."""
+        self._cancelled = True
+    # Test helpers
+    def get_events(self, event_type: Optional[str] = None) -> list[tuple[str, dict]]:
+        """Get recorded events, optionally filtered by type."""
+        if event_type is None:
+            return self._events
+        return [(t, p) for t, p in self._events if t == event_type]
+    def get_checkpoints(self) -> list[dict]:
+        """Get all checkpoints."""
+        return self._checkpoints
+    def clear(self) -> None:
+        """Clear recorded events and checkpoints."""
+        self._events.clear()
+        self._checkpoints.clear()
+        self._cancelled = False
+@dataclass
+class MockLLMResponse:
+    """A predefined response for MockLLMClient."""
+    content: str
+    tool_calls: Optional[list[dict]] = None
+    finish_reason: str = "stop"
+class MockLLMClient(LLMClient):
+    """
+    A mock LLM client for testing.
+    Configure with predefined responses or a response function.
+    Example:
+        # Simple predefined responses
+        client = MockLLMClient(responses=[
+            MockLLMResponse(content="Hello!"),
+            MockLLMResponse(content="How can I help?"),
+        ])
+        # Dynamic responses based on input
+        def respond(messages):
+            if "weather" in messages[-1]["content"].lower():
+                return MockLLMResponse(content="It's sunny!")
+            return MockLLMResponse(content="I don't know.")
+        client = MockLLMClient(response_fn=respond)
+    """
+    def __init__(
+        self,
+        responses: Optional[list[MockLLMResponse]] = None,
+        response_fn: Optional[Callable[[list[Message]], MockLLMResponse]] = None,
+        default_response: str = "Mock response",
+    ):
+        self._responses = responses or []
+        self._response_fn = response_fn
+        self._default_response = default_response
+        self._call_count = 0
+        self._calls: list[dict] = []
+    async def generate(
+        self,
+        messages: list[Message],
+        *,
+        model: Optional[str] = None,
+        stream: bool = False,
+        tools: Optional[list[dict]] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate a mock response."""
+        # Record the call
+        self._calls.append({
+            "messages": messages,
+            "model": model,
+            "tools": tools,
+            "kwargs": kwargs,
+        })
+        # Get response
+        if self._response_fn:
+            mock_resp = self._response_fn(messages)
+        elif self._call_count < len(self._responses):
+            mock_resp = self._responses[self._call_count]
+        else:
+            mock_resp = MockLLMResponse(content=self._default_response)
+        self._call_count += 1
+        # Build message
+        message: Message = {
+            "role": "assistant",
+            "content": mock_resp.content,
+        }
+        if mock_resp.tool_calls:
+            message["tool_calls"] = mock_resp.tool_calls
+        return LLMResponse(
+            message=message,
+            model=model or "mock-model",
+            finish_reason=mock_resp.finish_reason,
+            usage={"prompt_tokens": 10, "completion_tokens": 20},
+        )
+    async def stream(
+        self,
+        messages: list[Message],
+        *,
+        model: Optional[str] = None,
+        tools: Optional[list[dict]] = None,
+        **kwargs,
+    ) -> AsyncIterator[LLMStreamChunk]:
+        """Stream a mock response (yields content in chunks)."""
+        response = await self.generate(messages, model=model, tools=tools, **kwargs)
+        content = response.message.get("content", "")
+        # Yield content in chunks
+        for i in range(0, len(content), 10):
+            yield LLMStreamChunk(delta=content[i:i+10])
+        yield LLMStreamChunk(finish_reason="stop", usage=response.usage)
+    # Test helpers
+    def get_calls(self) -> list[dict]:
+        """Get all recorded calls."""
+        return self._calls
+    def get_call_count(self) -> int:
+        """Get the number of calls made."""
+        return self._call_count
+    def reset(self) -> None:
+        """Reset call tracking."""
+        self._call_count = 0
+        self._calls.clear()
+class LLMEvaluator:
+    """
+    Use an LLM to evaluate agent responses.
+    This is useful for testing that agent responses meet certain criteria
+    without having to write brittle string matching tests.
+    Example:
+        evaluator = LLMEvaluator(openai_client)
+        passed, explanation = await evaluator.evaluate(
+            user_query="What's the weather?",
+            agent_response="It's currently 72°F and sunny in San Francisco.",
+            criteria="The response should include temperature and weather conditions"
+        )
+        assert passed, f"Evaluation failed: {explanation}"
+    """
+    def __init__(self, llm_client: LLMClient, model: str = "gpt-4o-mini"):
+        self._client = llm_client
+        self._model = model
+    async def evaluate(
+        self,
+        user_query: str,
+        agent_response: str,
+        criteria: str,
+    ) -> tuple[bool, str]:
+        """
+        Evaluate an agent response against criteria.
+        Args:
+            user_query: The original user query
+            agent_response: The agent's response
+            criteria: What the response should satisfy
+        Returns:
+            Tuple of (passed: bool, explanation: str)
+        """
+        eval_prompt = f"""You are evaluating an AI assistant's response.
+User Query: {user_query}
+Agent Response: {agent_response}
+Evaluation Criteria: {criteria}
+Does the response meet the criteria? Answer with just "PASS" or "FAIL" followed by a brief explanation."""
+        response = await self._client.generate(
+            messages=[{"role": "user", "content": eval_prompt}],
+            model=self._model,
+            temperature=0,
+        )
+        result = response.message.get("content", "FAIL Unknown error")
+        passed = result.strip().upper().startswith("PASS")
+        return passed, result
+    async def evaluate_tool_usage(
+        self,
+        user_query: str,
+        tool_calls: list[dict],
+        expected_tools: list[str],
+    ) -> tuple[bool, str]:
+        """
+        Evaluate whether the agent used the expected tools.
+        Args:
+            user_query: The original user query
+            tool_calls: List of tool calls made by the agent
+            expected_tools: List of tool names that should have been called
+        Returns:
+            Tuple of (passed: bool, explanation: str)
+        """
+        tool_names = [tc.get("function", {}).get("name", tc.get("name", "unknown"))
+                      for tc in tool_calls]
+        missing = set(expected_tools) - set(tool_names)
+        if missing:
+            return False, f"Missing expected tools: {missing}. Called: {tool_names}"
+        return True, f"All expected tools were called: {tool_names}"
+def create_test_context(
+    message: str,
+    *,
+    tools: Optional[list[Tool]] = None,
+    metadata: Optional[dict] = None,
+    params: Optional[dict] = None,
+) -> MockRunContext:
+    """
+    Convenience function to create a test context.
+    Example:
+        ctx = create_test_context("Hello, agent!", tools=[my_tool])
+        result = await agent.run(ctx)
+    """
+    registry = ToolRegistry()
+    if tools:
+        for tool in tools:
+            registry.register(tool)
+    return MockRunContext(
+        input_messages=[{"role": "user", "content": message}],
+        tool_registry=registry,
+        metadata=metadata or {},
+        params=params or {},
+    )
+async def run_agent_test(
+    agent: AgentRuntime,
+    message: str,
+    *,
+    tools: Optional[list[Tool]] = None,
+    metadata: Optional[dict] = None,
+) -> tuple[RunResult, MockRunContext]:
+    """
+    Run an agent with a test message and return both result and context.
+    Example:
+        result, ctx = await run_agent_test(my_agent, "Hello!")
+        assert "greeting" in result.final_output
+        assert len(ctx.get_events()) > 0
+    """
+    ctx = create_test_context(message, tools=tools, metadata=metadata)
+    result = await agent.run(ctx)
+    return result, ctx

{agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "agent-runtime-core"
-version = "0.1.1"
+version = "0.1.2"
 description = "Framework-agnostic Python library for executing AI agents with consistent patterns"
 readme = "README.md"
 license = "MIT"

agent_runtime_core-0.1.2/tests/test_testing.py ADDED Viewed

@@ -0,0 +1,266 @@
+"""Tests for the testing utilities module."""
+import pytest
+from uuid import UUID
+from agent_runtime import (
+    AgentRuntime,
+    EventType,
+    RunContext,
+    RunResult,
+    Tool,
+    ToolRegistry,
+)
+from agent_runtime.testing import (
+    MockRunContext,
+    MockLLMClient,
+    MockLLMResponse,
+    create_test_context,
+    run_agent_test,
+)
+class TestMockRunContext:
+    """Tests for MockRunContext."""
+    def test_default_values(self):
+        """Test that MockRunContext has sensible defaults."""
+        ctx = MockRunContext()
+        assert isinstance(ctx.run_id, UUID)
+        assert ctx.conversation_id is None
+        assert ctx.input_messages == []
+        assert ctx.params == {}
+        assert ctx.metadata == {}
+        assert isinstance(ctx.tool_registry, ToolRegistry)
+        assert ctx.cancelled() is False
+    def test_custom_values(self):
+        """Test MockRunContext with custom values."""
+        ctx = MockRunContext(
+            input_messages=[{"role": "user", "content": "Hello"}],
+            params={"temperature": 0.7},
+            metadata={"user_id": "123"},
+        )
+        assert len(ctx.input_messages) == 1
+        assert ctx.input_messages[0]["content"] == "Hello"
+        assert ctx.params["temperature"] == 0.7
+        assert ctx.metadata["user_id"] == "123"
+    @pytest.mark.asyncio
+    async def test_emit_events(self):
+        """Test event emission and retrieval."""
+        ctx = MockRunContext()
+        await ctx.emit(EventType.RUN_STARTED, {"agent": "test"})
+        await ctx.emit(EventType.TOOL_CALL, {"tool": "search"})
+        await ctx.emit(EventType.RUN_SUCCEEDED, {"result": "done"})
+        # Get all events
+        events = ctx.get_events()
+        assert len(events) == 3
+        # Filter by type
+        tool_events = ctx.get_events("tool.call")
+        assert len(tool_events) == 1
+        assert tool_events[0][1]["tool"] == "search"
+    @pytest.mark.asyncio
+    async def test_checkpoints(self):
+        """Test checkpoint save and retrieval."""
+        ctx = MockRunContext()
+        # No checkpoint initially
+        state = await ctx.get_state()
+        assert state is None
+        # Save checkpoints
+        await ctx.checkpoint({"step": 1})
+        await ctx.checkpoint({"step": 2})
+        # Get latest
+        state = await ctx.get_state()
+        assert state["step"] == 2
+        # Get all
+        checkpoints = ctx.get_checkpoints()
+        assert len(checkpoints) == 2
+    def test_cancellation(self):
+        """Test cancellation flag."""
+        ctx = MockRunContext()
+        assert ctx.cancelled() is False
+        ctx.cancel()
+        assert ctx.cancelled() is True
+    @pytest.mark.asyncio
+    async def test_clear(self):
+        """Test clearing recorded data."""
+        ctx = MockRunContext()
+        await ctx.emit(EventType.RUN_STARTED, {})
+        await ctx.checkpoint({"step": 1})
+        ctx.cancel()
+        ctx.clear()
+        assert ctx.get_events() == []
+        assert ctx.get_checkpoints() == []
+        assert ctx.cancelled() is False
+class TestMockLLMClient:
+    """Tests for MockLLMClient."""
+    @pytest.mark.asyncio
+    async def test_default_response(self):
+        """Test default response when no responses configured."""
+        client = MockLLMClient()
+        response = await client.generate([{"role": "user", "content": "Hi"}])
+        assert response.message["role"] == "assistant"
+        assert response.message["content"] == "Mock response"
+        assert response.model == "mock-model"
+    @pytest.mark.asyncio
+    async def test_predefined_responses(self):
+        """Test cycling through predefined responses."""
+        client = MockLLMClient(responses=[
+            MockLLMResponse(content="First"),
+            MockLLMResponse(content="Second"),
+        ])
+        r1 = await client.generate([{"role": "user", "content": "1"}])
+        r2 = await client.generate([{"role": "user", "content": "2"}])
+        r3 = await client.generate([{"role": "user", "content": "3"}])
+        assert r1.message["content"] == "First"
+        assert r2.message["content"] == "Second"
+        assert r3.message["content"] == "Mock response"  # Falls back to default
+    @pytest.mark.asyncio
+    async def test_response_function(self):
+        """Test dynamic response function."""
+        def respond(messages):
+            content = messages[-1].get("content", "")
+            if "weather" in content.lower():
+                return MockLLMResponse(content="It's sunny!")
+            return MockLLMResponse(content="I don't know.")
+        client = MockLLMClient(response_fn=respond)
+        r1 = await client.generate([{"role": "user", "content": "What's the weather?"}])
+        r2 = await client.generate([{"role": "user", "content": "Hello"}])
+        assert r1.message["content"] == "It's sunny!"
+        assert r2.message["content"] == "I don't know."
+    @pytest.mark.asyncio
+    async def test_tool_calls(self):
+        """Test responses with tool calls."""
+        client = MockLLMClient(responses=[
+            MockLLMResponse(
+                content="",
+                tool_calls=[{
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "search", "arguments": '{"q": "test"}'}
+                }]
+            )
+        ])
+        response = await client.generate([{"role": "user", "content": "Search"}])
+        assert response.message["tool_calls"] is not None
+        assert len(response.message["tool_calls"]) == 1
+        assert response.message["tool_calls"][0]["function"]["name"] == "search"
+    @pytest.mark.asyncio
+    async def test_call_tracking(self):
+        """Test that calls are recorded."""
+        client = MockLLMClient()
+        await client.generate(
+            [{"role": "user", "content": "Hi"}],
+            model="gpt-4",
+            temperature=0.5,
+        )
+        await client.generate([{"role": "user", "content": "Bye"}])
+        assert client.get_call_count() == 2
+        calls = client.get_calls()
+        assert calls[0]["model"] == "gpt-4"
+        assert calls[1]["messages"][0]["content"] == "Bye"
+        client.reset()
+        assert client.get_call_count() == 0
+    @pytest.mark.asyncio
+    async def test_streaming(self):
+        """Test streaming responses."""
+        client = MockLLMClient(responses=[
+            MockLLMResponse(content="Hello, world!")
+        ])
+        chunks = []
+        async for chunk in client.stream([{"role": "user", "content": "Hi"}]):
+            chunks.append(chunk)
+        # Should have content chunks plus final chunk
+        assert len(chunks) >= 2
+        assert chunks[-1].finish_reason == "stop"
+        # Reconstruct content
+        content = "".join(c.delta for c in chunks)
+        assert content == "Hello, world!"
+class TestHelperFunctions:
+    """Tests for helper functions."""
+    def test_create_test_context(self):
+        """Test create_test_context helper."""
+        tool = Tool(
+            name="test_tool",
+            description="A test tool",
+            parameters={"type": "object", "properties": {}},
+            handler=lambda: "result",
+        )
+        ctx = create_test_context(
+            "Hello, agent!",
+            tools=[tool],
+            metadata={"user": "test"},
+            params={"mode": "test"},
+        )
+        assert ctx.input_messages[0]["content"] == "Hello, agent!"
+        assert ctx.tool_registry.get("test_tool") is not None
+        assert ctx.metadata["user"] == "test"
+        assert ctx.params["mode"] == "test"
+    @pytest.mark.asyncio
+    async def test_run_agent_test(self):
+        """Test run_agent_test helper."""
+        class TestAgent(AgentRuntime):
+            @property
+            def key(self) -> str:
+                return "test-agent"
+            async def run(self, ctx: RunContext) -> RunResult:
+                await ctx.emit(EventType.RUN_STARTED, {})
+                return RunResult(
+                    final_output={"echo": ctx.input_messages[0]["content"]}
+                )
+        agent = TestAgent()
+        result, ctx = await run_agent_test(agent, "Hello!")
+        assert result.final_output["echo"] == "Hello!"
+        assert len(ctx.get_events()) == 1
+        assert ctx.get_events()[0][0] == "run.started"