agent-runtime-core 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/.gitignore +1 -0
  2. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/PKG-INFO +1 -1
  3. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/__init__.py +19 -1
  4. agent_runtime_core-0.1.2/agent_runtime/testing.py +358 -0
  5. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/pyproject.toml +1 -1
  6. agent_runtime_core-0.1.2/tests/test_testing.py +266 -0
  7. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/LICENSE +0 -0
  8. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/README.md +0 -0
  9. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/config.py +0 -0
  10. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/events/__init__.py +0 -0
  11. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/events/base.py +0 -0
  12. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/events/memory.py +0 -0
  13. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/events/redis.py +0 -0
  14. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/events/sqlite.py +0 -0
  15. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/interfaces.py +0 -0
  16. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/llm/__init__.py +0 -0
  17. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/llm/anthropic.py +0 -0
  18. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/llm/litellm_client.py +0 -0
  19. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/llm/openai.py +0 -0
  20. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/queue/__init__.py +0 -0
  21. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/queue/base.py +0 -0
  22. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/queue/memory.py +0 -0
  23. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/queue/redis.py +0 -0
  24. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/queue/sqlite.py +0 -0
  25. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/registry.py +0 -0
  26. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/runner.py +0 -0
  27. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/state/__init__.py +0 -0
  28. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/state/base.py +0 -0
  29. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/state/memory.py +0 -0
  30. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/state/redis.py +0 -0
  31. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/state/sqlite.py +0 -0
  32. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/tracing/__init__.py +0 -0
  33. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/tracing/langfuse.py +0 -0
  34. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/agent_runtime/tracing/noop.py +0 -0
  35. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/tests/__init__.py +0 -0
  36. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/tests/test_events.py +0 -0
  37. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/tests/test_imports.py +0 -0
  38. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/tests/test_queue.py +0 -0
  39. {agent_runtime_core-0.1.1 → agent_runtime_core-0.1.2}/tests/test_state.py +0 -0
@@ -147,3 +147,4 @@ cython_debug/
147
147
  # OS
148
148
  .DS_Store
149
149
  Thumbs.db
150
+ .pypirc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-runtime-core
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Framework-agnostic Python library for executing AI agents with consistent patterns
5
5
  Project-URL: Homepage, https://github.com/colstrom/agent_runtime
6
6
  Project-URL: Repository, https://github.com/colstrom/agent_runtime
@@ -34,7 +34,7 @@ Example usage:
34
34
  return RunResult(final_output={"message": "Hello!"})
35
35
  """
36
36
 
37
- __version__ = "0.1.1"
37
+ __version__ = "0.1.2"
38
38
 
39
39
  # Core interfaces
40
40
  from agent_runtime.interfaces import (
@@ -76,6 +76,17 @@ from agent_runtime.runner import (
76
76
  RunContextImpl,
77
77
  )
78
78
 
79
+
80
+ # Testing utilities
81
+ from agent_runtime.testing import (
82
+ MockRunContext,
83
+ MockLLMClient,
84
+ MockLLMResponse,
85
+ LLMEvaluator,
86
+ create_test_context,
87
+ run_agent_test,
88
+ )
89
+
79
90
  __all__ = [
80
91
  # Version
81
92
  "__version__",
@@ -107,4 +118,11 @@ __all__ = [
107
118
  "AgentRunner",
108
119
  "RunnerConfig",
109
120
  "RunContextImpl",
121
+ # Testing
122
+ "MockRunContext",
123
+ "MockLLMClient",
124
+ "MockLLMResponse",
125
+ "LLMEvaluator",
126
+ "create_test_context",
127
+ "run_agent_test",
110
128
  ]
@@ -0,0 +1,358 @@
1
+ """
2
+ Testing utilities for agent runtimes.
3
+
4
+ This module provides tools for testing agent implementations:
5
+ - MockRunContext: A concrete RunContext for unit tests
6
+ - MockLLMClient: A mock LLM client with predefined responses
7
+ - AgentTestCase: Base test class with common helpers
8
+ - LLMEvaluator: Use LLM to evaluate agent responses
9
+
10
+ Example usage:
11
+ from agent_runtime.testing import MockRunContext, MockLLMClient, AgentTestCase
12
+
13
+ class TestMyAgent(AgentTestCase):
14
+ async def test_agent_responds(self):
15
+ ctx = self.create_context("Hello, agent!")
16
+ result = await self.agent.run(ctx)
17
+ self.assertIn("response", result.final_output)
18
+ """
19
+
20
+ import asyncio
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Callable, Optional, AsyncIterator
23
+ from uuid import UUID, uuid4
24
+ import json
25
+
26
+ from .interfaces import (
27
+ AgentRuntime,
28
+ EventType,
29
+ LLMClient,
30
+ LLMResponse,
31
+ LLMStreamChunk,
32
+ Message,
33
+ RunContext,
34
+ RunResult,
35
+ Tool,
36
+ ToolRegistry,
37
+ )
38
+
39
+
40
+ @dataclass
41
+ class MockRunContext:
42
+ """
43
+ A concrete implementation of RunContext for testing.
44
+
45
+ Use this in unit tests to provide a context to your agent
46
+ without needing the full runtime infrastructure.
47
+
48
+ Example:
49
+ ctx = MockRunContext(
50
+ input_messages=[{"role": "user", "content": "Hello"}],
51
+ metadata={"user_id": "123"}
52
+ )
53
+ result = await my_agent.run(ctx)
54
+ """
55
+
56
+ input_messages: list[Message] = field(default_factory=list)
57
+ params: dict = field(default_factory=dict)
58
+ metadata: dict = field(default_factory=dict)
59
+ run_id: UUID = field(default_factory=uuid4)
60
+ conversation_id: Optional[UUID] = None
61
+ tool_registry: ToolRegistry = field(default_factory=ToolRegistry)
62
+
63
+ # Internal state
64
+ _events: list[tuple[str, dict]] = field(default_factory=list)
65
+ _checkpoints: list[dict] = field(default_factory=list)
66
+ _cancelled: bool = False
67
+
68
+ async def emit(self, event_type: EventType | str, payload: dict) -> None:
69
+ """Record emitted events for later inspection."""
70
+ event_name = event_type.value if isinstance(event_type, EventType) else event_type
71
+ self._events.append((event_name, payload))
72
+
73
+ async def checkpoint(self, state: dict) -> None:
74
+ """Save a checkpoint."""
75
+ self._checkpoints.append(state)
76
+
77
+ async def get_state(self) -> Optional[dict]:
78
+ """Get the last checkpoint."""
79
+ return self._checkpoints[-1] if self._checkpoints else None
80
+
81
+ def cancelled(self) -> bool:
82
+ """Check if cancelled."""
83
+ return self._cancelled
84
+
85
+ def cancel(self) -> None:
86
+ """Request cancellation."""
87
+ self._cancelled = True
88
+
89
+ # Test helpers
90
+ def get_events(self, event_type: Optional[str] = None) -> list[tuple[str, dict]]:
91
+ """Get recorded events, optionally filtered by type."""
92
+ if event_type is None:
93
+ return self._events
94
+ return [(t, p) for t, p in self._events if t == event_type]
95
+
96
+ def get_checkpoints(self) -> list[dict]:
97
+ """Get all checkpoints."""
98
+ return self._checkpoints
99
+
100
+ def clear(self) -> None:
101
+ """Clear recorded events and checkpoints."""
102
+ self._events.clear()
103
+ self._checkpoints.clear()
104
+ self._cancelled = False
105
+
106
+
107
+ @dataclass
108
+ class MockLLMResponse:
109
+ """A predefined response for MockLLMClient."""
110
+ content: str
111
+ tool_calls: Optional[list[dict]] = None
112
+ finish_reason: str = "stop"
113
+
114
+
115
+ class MockLLMClient(LLMClient):
116
+ """
117
+ A mock LLM client for testing.
118
+
119
+ Configure with predefined responses or a response function.
120
+
121
+ Example:
122
+ # Simple predefined responses
123
+ client = MockLLMClient(responses=[
124
+ MockLLMResponse(content="Hello!"),
125
+ MockLLMResponse(content="How can I help?"),
126
+ ])
127
+
128
+ # Dynamic responses based on input
129
+ def respond(messages):
130
+ if "weather" in messages[-1]["content"].lower():
131
+ return MockLLMResponse(content="It's sunny!")
132
+ return MockLLMResponse(content="I don't know.")
133
+
134
+ client = MockLLMClient(response_fn=respond)
135
+ """
136
+
137
+ def __init__(
138
+ self,
139
+ responses: Optional[list[MockLLMResponse]] = None,
140
+ response_fn: Optional[Callable[[list[Message]], MockLLMResponse]] = None,
141
+ default_response: str = "Mock response",
142
+ ):
143
+ self._responses = responses or []
144
+ self._response_fn = response_fn
145
+ self._default_response = default_response
146
+ self._call_count = 0
147
+ self._calls: list[dict] = []
148
+
149
+ async def generate(
150
+ self,
151
+ messages: list[Message],
152
+ *,
153
+ model: Optional[str] = None,
154
+ stream: bool = False,
155
+ tools: Optional[list[dict]] = None,
156
+ temperature: Optional[float] = None,
157
+ max_tokens: Optional[int] = None,
158
+ **kwargs,
159
+ ) -> LLMResponse:
160
+ """Generate a mock response."""
161
+ # Record the call
162
+ self._calls.append({
163
+ "messages": messages,
164
+ "model": model,
165
+ "tools": tools,
166
+ "kwargs": kwargs,
167
+ })
168
+
169
+ # Get response
170
+ if self._response_fn:
171
+ mock_resp = self._response_fn(messages)
172
+ elif self._call_count < len(self._responses):
173
+ mock_resp = self._responses[self._call_count]
174
+ else:
175
+ mock_resp = MockLLMResponse(content=self._default_response)
176
+
177
+ self._call_count += 1
178
+
179
+ # Build message
180
+ message: Message = {
181
+ "role": "assistant",
182
+ "content": mock_resp.content,
183
+ }
184
+ if mock_resp.tool_calls:
185
+ message["tool_calls"] = mock_resp.tool_calls
186
+
187
+ return LLMResponse(
188
+ message=message,
189
+ model=model or "mock-model",
190
+ finish_reason=mock_resp.finish_reason,
191
+ usage={"prompt_tokens": 10, "completion_tokens": 20},
192
+ )
193
+
194
+ async def stream(
195
+ self,
196
+ messages: list[Message],
197
+ *,
198
+ model: Optional[str] = None,
199
+ tools: Optional[list[dict]] = None,
200
+ **kwargs,
201
+ ) -> AsyncIterator[LLMStreamChunk]:
202
+ """Stream a mock response (yields content in chunks)."""
203
+ response = await self.generate(messages, model=model, tools=tools, **kwargs)
204
+ content = response.message.get("content", "")
205
+
206
+ # Yield content in chunks
207
+ for i in range(0, len(content), 10):
208
+ yield LLMStreamChunk(delta=content[i:i+10])
209
+
210
+ yield LLMStreamChunk(finish_reason="stop", usage=response.usage)
211
+
212
+ # Test helpers
213
+ def get_calls(self) -> list[dict]:
214
+ """Get all recorded calls."""
215
+ return self._calls
216
+
217
+ def get_call_count(self) -> int:
218
+ """Get the number of calls made."""
219
+ return self._call_count
220
+
221
+ def reset(self) -> None:
222
+ """Reset call tracking."""
223
+ self._call_count = 0
224
+ self._calls.clear()
225
+
226
+
227
+ class LLMEvaluator:
228
+ """
229
+ Use an LLM to evaluate agent responses.
230
+
231
+ This is useful for testing that agent responses meet certain criteria
232
+ without having to write brittle string matching tests.
233
+
234
+ Example:
235
+ evaluator = LLMEvaluator(openai_client)
236
+
237
+ passed, explanation = await evaluator.evaluate(
238
+ user_query="What's the weather?",
239
+ agent_response="It's currently 72°F and sunny in San Francisco.",
240
+ criteria="The response should include temperature and weather conditions"
241
+ )
242
+
243
+ assert passed, f"Evaluation failed: {explanation}"
244
+ """
245
+
246
+ def __init__(self, llm_client: LLMClient, model: str = "gpt-4o-mini"):
247
+ self._client = llm_client
248
+ self._model = model
249
+
250
+ async def evaluate(
251
+ self,
252
+ user_query: str,
253
+ agent_response: str,
254
+ criteria: str,
255
+ ) -> tuple[bool, str]:
256
+ """
257
+ Evaluate an agent response against criteria.
258
+
259
+ Args:
260
+ user_query: The original user query
261
+ agent_response: The agent's response
262
+ criteria: What the response should satisfy
263
+
264
+ Returns:
265
+ Tuple of (passed: bool, explanation: str)
266
+ """
267
+ eval_prompt = f"""You are evaluating an AI assistant's response.
268
+
269
+ User Query: {user_query}
270
+
271
+ Agent Response: {agent_response}
272
+
273
+ Evaluation Criteria: {criteria}
274
+
275
+ Does the response meet the criteria? Answer with just "PASS" or "FAIL" followed by a brief explanation."""
276
+
277
+ response = await self._client.generate(
278
+ messages=[{"role": "user", "content": eval_prompt}],
279
+ model=self._model,
280
+ temperature=0,
281
+ )
282
+
283
+ result = response.message.get("content", "FAIL Unknown error")
284
+ passed = result.strip().upper().startswith("PASS")
285
+ return passed, result
286
+
287
+ async def evaluate_tool_usage(
288
+ self,
289
+ user_query: str,
290
+ tool_calls: list[dict],
291
+ expected_tools: list[str],
292
+ ) -> tuple[bool, str]:
293
+ """
294
+ Evaluate whether the agent used the expected tools.
295
+
296
+ Args:
297
+ user_query: The original user query
298
+ tool_calls: List of tool calls made by the agent
299
+ expected_tools: List of tool names that should have been called
300
+
301
+ Returns:
302
+ Tuple of (passed: bool, explanation: str)
303
+ """
304
+ tool_names = [tc.get("function", {}).get("name", tc.get("name", "unknown"))
305
+ for tc in tool_calls]
306
+
307
+ missing = set(expected_tools) - set(tool_names)
308
+ if missing:
309
+ return False, f"Missing expected tools: {missing}. Called: {tool_names}"
310
+
311
+ return True, f"All expected tools were called: {tool_names}"
312
+
313
+
314
+ def create_test_context(
315
+ message: str,
316
+ *,
317
+ tools: Optional[list[Tool]] = None,
318
+ metadata: Optional[dict] = None,
319
+ params: Optional[dict] = None,
320
+ ) -> MockRunContext:
321
+ """
322
+ Convenience function to create a test context.
323
+
324
+ Example:
325
+ ctx = create_test_context("Hello, agent!", tools=[my_tool])
326
+ result = await agent.run(ctx)
327
+ """
328
+ registry = ToolRegistry()
329
+ if tools:
330
+ for tool in tools:
331
+ registry.register(tool)
332
+
333
+ return MockRunContext(
334
+ input_messages=[{"role": "user", "content": message}],
335
+ tool_registry=registry,
336
+ metadata=metadata or {},
337
+ params=params or {},
338
+ )
339
+
340
+
341
+ async def run_agent_test(
342
+ agent: AgentRuntime,
343
+ message: str,
344
+ *,
345
+ tools: Optional[list[Tool]] = None,
346
+ metadata: Optional[dict] = None,
347
+ ) -> tuple[RunResult, MockRunContext]:
348
+ """
349
+ Run an agent with a test message and return both result and context.
350
+
351
+ Example:
352
+ result, ctx = await run_agent_test(my_agent, "Hello!")
353
+ assert "greeting" in result.final_output
354
+ assert len(ctx.get_events()) > 0
355
+ """
356
+ ctx = create_test_context(message, tools=tools, metadata=metadata)
357
+ result = await agent.run(ctx)
358
+ return result, ctx
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "agent-runtime-core"
7
- version = "0.1.1"
7
+ version = "0.1.2"
8
8
  description = "Framework-agnostic Python library for executing AI agents with consistent patterns"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,266 @@
1
+ """Tests for the testing utilities module."""
2
+
3
+ import pytest
4
+ from uuid import UUID
5
+
6
+ from agent_runtime import (
7
+ AgentRuntime,
8
+ EventType,
9
+ RunContext,
10
+ RunResult,
11
+ Tool,
12
+ ToolRegistry,
13
+ )
14
+ from agent_runtime.testing import (
15
+ MockRunContext,
16
+ MockLLMClient,
17
+ MockLLMResponse,
18
+ create_test_context,
19
+ run_agent_test,
20
+ )
21
+
22
+
23
+ class TestMockRunContext:
24
+ """Tests for MockRunContext."""
25
+
26
+ def test_default_values(self):
27
+ """Test that MockRunContext has sensible defaults."""
28
+ ctx = MockRunContext()
29
+
30
+ assert isinstance(ctx.run_id, UUID)
31
+ assert ctx.conversation_id is None
32
+ assert ctx.input_messages == []
33
+ assert ctx.params == {}
34
+ assert ctx.metadata == {}
35
+ assert isinstance(ctx.tool_registry, ToolRegistry)
36
+ assert ctx.cancelled() is False
37
+
38
+ def test_custom_values(self):
39
+ """Test MockRunContext with custom values."""
40
+ ctx = MockRunContext(
41
+ input_messages=[{"role": "user", "content": "Hello"}],
42
+ params={"temperature": 0.7},
43
+ metadata={"user_id": "123"},
44
+ )
45
+
46
+ assert len(ctx.input_messages) == 1
47
+ assert ctx.input_messages[0]["content"] == "Hello"
48
+ assert ctx.params["temperature"] == 0.7
49
+ assert ctx.metadata["user_id"] == "123"
50
+
51
+ @pytest.mark.asyncio
52
+ async def test_emit_events(self):
53
+ """Test event emission and retrieval."""
54
+ ctx = MockRunContext()
55
+
56
+ await ctx.emit(EventType.RUN_STARTED, {"agent": "test"})
57
+ await ctx.emit(EventType.TOOL_CALL, {"tool": "search"})
58
+ await ctx.emit(EventType.RUN_SUCCEEDED, {"result": "done"})
59
+
60
+ # Get all events
61
+ events = ctx.get_events()
62
+ assert len(events) == 3
63
+
64
+ # Filter by type
65
+ tool_events = ctx.get_events("tool.call")
66
+ assert len(tool_events) == 1
67
+ assert tool_events[0][1]["tool"] == "search"
68
+
69
+ @pytest.mark.asyncio
70
+ async def test_checkpoints(self):
71
+ """Test checkpoint save and retrieval."""
72
+ ctx = MockRunContext()
73
+
74
+ # No checkpoint initially
75
+ state = await ctx.get_state()
76
+ assert state is None
77
+
78
+ # Save checkpoints
79
+ await ctx.checkpoint({"step": 1})
80
+ await ctx.checkpoint({"step": 2})
81
+
82
+ # Get latest
83
+ state = await ctx.get_state()
84
+ assert state["step"] == 2
85
+
86
+ # Get all
87
+ checkpoints = ctx.get_checkpoints()
88
+ assert len(checkpoints) == 2
89
+
90
+ def test_cancellation(self):
91
+ """Test cancellation flag."""
92
+ ctx = MockRunContext()
93
+
94
+ assert ctx.cancelled() is False
95
+ ctx.cancel()
96
+ assert ctx.cancelled() is True
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_clear(self):
100
+ """Test clearing recorded data."""
101
+ ctx = MockRunContext()
102
+
103
+ await ctx.emit(EventType.RUN_STARTED, {})
104
+ await ctx.checkpoint({"step": 1})
105
+ ctx.cancel()
106
+
107
+ ctx.clear()
108
+
109
+ assert ctx.get_events() == []
110
+ assert ctx.get_checkpoints() == []
111
+ assert ctx.cancelled() is False
112
+
113
+
114
+ class TestMockLLMClient:
115
+ """Tests for MockLLMClient."""
116
+
117
+ @pytest.mark.asyncio
118
+ async def test_default_response(self):
119
+ """Test default response when no responses configured."""
120
+ client = MockLLMClient()
121
+
122
+ response = await client.generate([{"role": "user", "content": "Hi"}])
123
+
124
+ assert response.message["role"] == "assistant"
125
+ assert response.message["content"] == "Mock response"
126
+ assert response.model == "mock-model"
127
+
128
+ @pytest.mark.asyncio
129
+ async def test_predefined_responses(self):
130
+ """Test cycling through predefined responses."""
131
+ client = MockLLMClient(responses=[
132
+ MockLLMResponse(content="First"),
133
+ MockLLMResponse(content="Second"),
134
+ ])
135
+
136
+ r1 = await client.generate([{"role": "user", "content": "1"}])
137
+ r2 = await client.generate([{"role": "user", "content": "2"}])
138
+ r3 = await client.generate([{"role": "user", "content": "3"}])
139
+
140
+ assert r1.message["content"] == "First"
141
+ assert r2.message["content"] == "Second"
142
+ assert r3.message["content"] == "Mock response" # Falls back to default
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_response_function(self):
146
+ """Test dynamic response function."""
147
+ def respond(messages):
148
+ content = messages[-1].get("content", "")
149
+ if "weather" in content.lower():
150
+ return MockLLMResponse(content="It's sunny!")
151
+ return MockLLMResponse(content="I don't know.")
152
+
153
+ client = MockLLMClient(response_fn=respond)
154
+
155
+ r1 = await client.generate([{"role": "user", "content": "What's the weather?"}])
156
+ r2 = await client.generate([{"role": "user", "content": "Hello"}])
157
+
158
+ assert r1.message["content"] == "It's sunny!"
159
+ assert r2.message["content"] == "I don't know."
160
+
161
+ @pytest.mark.asyncio
162
+ async def test_tool_calls(self):
163
+ """Test responses with tool calls."""
164
+ client = MockLLMClient(responses=[
165
+ MockLLMResponse(
166
+ content="",
167
+ tool_calls=[{
168
+ "id": "call_1",
169
+ "type": "function",
170
+ "function": {"name": "search", "arguments": '{"q": "test"}'}
171
+ }]
172
+ )
173
+ ])
174
+
175
+ response = await client.generate([{"role": "user", "content": "Search"}])
176
+
177
+ assert response.message["tool_calls"] is not None
178
+ assert len(response.message["tool_calls"]) == 1
179
+ assert response.message["tool_calls"][0]["function"]["name"] == "search"
180
+
181
+ @pytest.mark.asyncio
182
+ async def test_call_tracking(self):
183
+ """Test that calls are recorded."""
184
+ client = MockLLMClient()
185
+
186
+ await client.generate(
187
+ [{"role": "user", "content": "Hi"}],
188
+ model="gpt-4",
189
+ temperature=0.5,
190
+ )
191
+ await client.generate([{"role": "user", "content": "Bye"}])
192
+
193
+ assert client.get_call_count() == 2
194
+
195
+ calls = client.get_calls()
196
+ assert calls[0]["model"] == "gpt-4"
197
+ assert calls[1]["messages"][0]["content"] == "Bye"
198
+
199
+ client.reset()
200
+ assert client.get_call_count() == 0
201
+
202
+ @pytest.mark.asyncio
203
+ async def test_streaming(self):
204
+ """Test streaming responses."""
205
+ client = MockLLMClient(responses=[
206
+ MockLLMResponse(content="Hello, world!")
207
+ ])
208
+
209
+ chunks = []
210
+ async for chunk in client.stream([{"role": "user", "content": "Hi"}]):
211
+ chunks.append(chunk)
212
+
213
+ # Should have content chunks plus final chunk
214
+ assert len(chunks) >= 2
215
+ assert chunks[-1].finish_reason == "stop"
216
+
217
+ # Reconstruct content
218
+ content = "".join(c.delta for c in chunks)
219
+ assert content == "Hello, world!"
220
+
221
+
222
+ class TestHelperFunctions:
223
+ """Tests for helper functions."""
224
+
225
+ def test_create_test_context(self):
226
+ """Test create_test_context helper."""
227
+ tool = Tool(
228
+ name="test_tool",
229
+ description="A test tool",
230
+ parameters={"type": "object", "properties": {}},
231
+ handler=lambda: "result",
232
+ )
233
+
234
+ ctx = create_test_context(
235
+ "Hello, agent!",
236
+ tools=[tool],
237
+ metadata={"user": "test"},
238
+ params={"mode": "test"},
239
+ )
240
+
241
+ assert ctx.input_messages[0]["content"] == "Hello, agent!"
242
+ assert ctx.tool_registry.get("test_tool") is not None
243
+ assert ctx.metadata["user"] == "test"
244
+ assert ctx.params["mode"] == "test"
245
+
246
+ @pytest.mark.asyncio
247
+ async def test_run_agent_test(self):
248
+ """Test run_agent_test helper."""
249
+
250
+ class TestAgent(AgentRuntime):
251
+ @property
252
+ def key(self) -> str:
253
+ return "test-agent"
254
+
255
+ async def run(self, ctx: RunContext) -> RunResult:
256
+ await ctx.emit(EventType.RUN_STARTED, {})
257
+ return RunResult(
258
+ final_output={"echo": ctx.input_messages[0]["content"]}
259
+ )
260
+
261
+ agent = TestAgent()
262
+ result, ctx = await run_agent_test(agent, "Hello!")
263
+
264
+ assert result.final_output["echo"] == "Hello!"
265
+ assert len(ctx.get_events()) == 1
266
+ assert ctx.get_events()[0][0] == "run.started"