agent-runtime-core 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agent_runtime/__init__.py CHANGED
@@ -34,7 +34,7 @@ Example usage:
34
34
  return RunResult(final_output={"message": "Hello!"})
35
35
  """
36
36
 
37
- __version__ = "0.1.1"
37
+ __version__ = "0.1.3"
38
38
 
39
39
  # Core interfaces
40
40
  from agent_runtime.interfaces import (
@@ -76,6 +76,17 @@ from agent_runtime.runner import (
76
76
  RunContextImpl,
77
77
  )
78
78
 
79
+
80
+ # Testing utilities
81
+ from agent_runtime.testing import (
82
+ MockRunContext,
83
+ MockLLMClient,
84
+ MockLLMResponse,
85
+ LLMEvaluator,
86
+ create_test_context,
87
+ run_agent_test,
88
+ )
89
+
79
90
  __all__ = [
80
91
  # Version
81
92
  "__version__",
@@ -107,4 +118,11 @@ __all__ = [
107
118
  "AgentRunner",
108
119
  "RunnerConfig",
109
120
  "RunContextImpl",
121
+ # Testing
122
+ "MockRunContext",
123
+ "MockLLMClient",
124
+ "MockLLMResponse",
125
+ "LLMEvaluator",
126
+ "create_test_context",
127
+ "run_agent_test",
110
128
  ]
@@ -267,6 +267,10 @@ class ToolRegistry:
267
267
  for tool in self._tools.values()
268
268
  ]
269
269
 
270
+ def get_tool_definitions(self) -> list[dict]:
271
+ """Alias for to_openai_format() for backwards compatibility."""
272
+ return self.to_openai_format()
273
+
270
274
  async def execute(self, name: str, arguments: dict) -> Any:
271
275
  """
272
276
  Execute a tool by name.
@@ -0,0 +1,358 @@
1
+ """
2
+ Testing utilities for agent runtimes.
3
+
4
+ This module provides tools for testing agent implementations:
5
+ - MockRunContext: A concrete RunContext for unit tests
6
+ - MockLLMClient: A mock LLM client with predefined responses
7
+ - AgentTestCase: Base test class with common helpers
8
+ - LLMEvaluator: Use LLM to evaluate agent responses
9
+
10
+ Example usage:
11
+ from agent_runtime.testing import MockRunContext, MockLLMClient, AgentTestCase
12
+
13
+ class TestMyAgent(AgentTestCase):
14
+ async def test_agent_responds(self):
15
+ ctx = self.create_context("Hello, agent!")
16
+ result = await self.agent.run(ctx)
17
+ self.assertIn("response", result.final_output)
18
+ """
19
+
20
+ import asyncio
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Callable, Optional, AsyncIterator
23
+ from uuid import UUID, uuid4
24
+ import json
25
+
26
+ from .interfaces import (
27
+ AgentRuntime,
28
+ EventType,
29
+ LLMClient,
30
+ LLMResponse,
31
+ LLMStreamChunk,
32
+ Message,
33
+ RunContext,
34
+ RunResult,
35
+ Tool,
36
+ ToolRegistry,
37
+ )
38
+
39
+
40
+ @dataclass
41
+ class MockRunContext:
42
+ """
43
+ A concrete implementation of RunContext for testing.
44
+
45
+ Use this in unit tests to provide a context to your agent
46
+ without needing the full runtime infrastructure.
47
+
48
+ Example:
49
+ ctx = MockRunContext(
50
+ input_messages=[{"role": "user", "content": "Hello"}],
51
+ metadata={"user_id": "123"}
52
+ )
53
+ result = await my_agent.run(ctx)
54
+ """
55
+
56
+ input_messages: list[Message] = field(default_factory=list)
57
+ params: dict = field(default_factory=dict)
58
+ metadata: dict = field(default_factory=dict)
59
+ run_id: UUID = field(default_factory=uuid4)
60
+ conversation_id: Optional[UUID] = None
61
+ tool_registry: ToolRegistry = field(default_factory=ToolRegistry)
62
+
63
+ # Internal state
64
+ _events: list[tuple[str, dict]] = field(default_factory=list)
65
+ _checkpoints: list[dict] = field(default_factory=list)
66
+ _cancelled: bool = False
67
+
68
+ async def emit(self, event_type: EventType | str, payload: dict) -> None:
69
+ """Record emitted events for later inspection."""
70
+ event_name = event_type.value if isinstance(event_type, EventType) else event_type
71
+ self._events.append((event_name, payload))
72
+
73
+ async def checkpoint(self, state: dict) -> None:
74
+ """Save a checkpoint."""
75
+ self._checkpoints.append(state)
76
+
77
+ async def get_state(self) -> Optional[dict]:
78
+ """Get the last checkpoint."""
79
+ return self._checkpoints[-1] if self._checkpoints else None
80
+
81
+ def cancelled(self) -> bool:
82
+ """Check if cancelled."""
83
+ return self._cancelled
84
+
85
+ def cancel(self) -> None:
86
+ """Request cancellation."""
87
+ self._cancelled = True
88
+
89
+ # Test helpers
90
+ def get_events(self, event_type: Optional[str] = None) -> list[tuple[str, dict]]:
91
+ """Get recorded events, optionally filtered by type."""
92
+ if event_type is None:
93
+ return self._events
94
+ return [(t, p) for t, p in self._events if t == event_type]
95
+
96
+ def get_checkpoints(self) -> list[dict]:
97
+ """Get all checkpoints."""
98
+ return self._checkpoints
99
+
100
+ def clear(self) -> None:
101
+ """Clear recorded events and checkpoints."""
102
+ self._events.clear()
103
+ self._checkpoints.clear()
104
+ self._cancelled = False
105
+
106
+
107
+ @dataclass
108
+ class MockLLMResponse:
109
+ """A predefined response for MockLLMClient."""
110
+ content: str
111
+ tool_calls: Optional[list[dict]] = None
112
+ finish_reason: str = "stop"
113
+
114
+
115
+ class MockLLMClient(LLMClient):
116
+ """
117
+ A mock LLM client for testing.
118
+
119
+ Configure with predefined responses or a response function.
120
+
121
+ Example:
122
+ # Simple predefined responses
123
+ client = MockLLMClient(responses=[
124
+ MockLLMResponse(content="Hello!"),
125
+ MockLLMResponse(content="How can I help?"),
126
+ ])
127
+
128
+ # Dynamic responses based on input
129
+ def respond(messages):
130
+ if "weather" in messages[-1]["content"].lower():
131
+ return MockLLMResponse(content="It's sunny!")
132
+ return MockLLMResponse(content="I don't know.")
133
+
134
+ client = MockLLMClient(response_fn=respond)
135
+ """
136
+
137
+ def __init__(
138
+ self,
139
+ responses: Optional[list[MockLLMResponse]] = None,
140
+ response_fn: Optional[Callable[[list[Message]], MockLLMResponse]] = None,
141
+ default_response: str = "Mock response",
142
+ ):
143
+ self._responses = responses or []
144
+ self._response_fn = response_fn
145
+ self._default_response = default_response
146
+ self._call_count = 0
147
+ self._calls: list[dict] = []
148
+
149
+ async def generate(
150
+ self,
151
+ messages: list[Message],
152
+ *,
153
+ model: Optional[str] = None,
154
+ stream: bool = False,
155
+ tools: Optional[list[dict]] = None,
156
+ temperature: Optional[float] = None,
157
+ max_tokens: Optional[int] = None,
158
+ **kwargs,
159
+ ) -> LLMResponse:
160
+ """Generate a mock response."""
161
+ # Record the call
162
+ self._calls.append({
163
+ "messages": messages,
164
+ "model": model,
165
+ "tools": tools,
166
+ "kwargs": kwargs,
167
+ })
168
+
169
+ # Get response
170
+ if self._response_fn:
171
+ mock_resp = self._response_fn(messages)
172
+ elif self._call_count < len(self._responses):
173
+ mock_resp = self._responses[self._call_count]
174
+ else:
175
+ mock_resp = MockLLMResponse(content=self._default_response)
176
+
177
+ self._call_count += 1
178
+
179
+ # Build message
180
+ message: Message = {
181
+ "role": "assistant",
182
+ "content": mock_resp.content,
183
+ }
184
+ if mock_resp.tool_calls:
185
+ message["tool_calls"] = mock_resp.tool_calls
186
+
187
+ return LLMResponse(
188
+ message=message,
189
+ model=model or "mock-model",
190
+ finish_reason=mock_resp.finish_reason,
191
+ usage={"prompt_tokens": 10, "completion_tokens": 20},
192
+ )
193
+
194
+ async def stream(
195
+ self,
196
+ messages: list[Message],
197
+ *,
198
+ model: Optional[str] = None,
199
+ tools: Optional[list[dict]] = None,
200
+ **kwargs,
201
+ ) -> AsyncIterator[LLMStreamChunk]:
202
+ """Stream a mock response (yields content in chunks)."""
203
+ response = await self.generate(messages, model=model, tools=tools, **kwargs)
204
+ content = response.message.get("content", "")
205
+
206
+ # Yield content in chunks
207
+ for i in range(0, len(content), 10):
208
+ yield LLMStreamChunk(delta=content[i:i+10])
209
+
210
+ yield LLMStreamChunk(finish_reason="stop", usage=response.usage)
211
+
212
+ # Test helpers
213
+ def get_calls(self) -> list[dict]:
214
+ """Get all recorded calls."""
215
+ return self._calls
216
+
217
+ def get_call_count(self) -> int:
218
+ """Get the number of calls made."""
219
+ return self._call_count
220
+
221
+ def reset(self) -> None:
222
+ """Reset call tracking."""
223
+ self._call_count = 0
224
+ self._calls.clear()
225
+
226
+
227
+ class LLMEvaluator:
228
+ """
229
+ Use an LLM to evaluate agent responses.
230
+
231
+ This is useful for testing that agent responses meet certain criteria
232
+ without having to write brittle string matching tests.
233
+
234
+ Example:
235
+ evaluator = LLMEvaluator(openai_client)
236
+
237
+ passed, explanation = await evaluator.evaluate(
238
+ user_query="What's the weather?",
239
+ agent_response="It's currently 72°F and sunny in San Francisco.",
240
+ criteria="The response should include temperature and weather conditions"
241
+ )
242
+
243
+ assert passed, f"Evaluation failed: {explanation}"
244
+ """
245
+
246
+ def __init__(self, llm_client: LLMClient, model: str = "gpt-4o-mini"):
247
+ self._client = llm_client
248
+ self._model = model
249
+
250
+ async def evaluate(
251
+ self,
252
+ user_query: str,
253
+ agent_response: str,
254
+ criteria: str,
255
+ ) -> tuple[bool, str]:
256
+ """
257
+ Evaluate an agent response against criteria.
258
+
259
+ Args:
260
+ user_query: The original user query
261
+ agent_response: The agent's response
262
+ criteria: What the response should satisfy
263
+
264
+ Returns:
265
+ Tuple of (passed: bool, explanation: str)
266
+ """
267
+ eval_prompt = f"""You are evaluating an AI assistant's response.
268
+
269
+ User Query: {user_query}
270
+
271
+ Agent Response: {agent_response}
272
+
273
+ Evaluation Criteria: {criteria}
274
+
275
+ Does the response meet the criteria? Answer with just "PASS" or "FAIL" followed by a brief explanation."""
276
+
277
+ response = await self._client.generate(
278
+ messages=[{"role": "user", "content": eval_prompt}],
279
+ model=self._model,
280
+ temperature=0,
281
+ )
282
+
283
+ result = response.message.get("content", "FAIL Unknown error")
284
+ passed = result.strip().upper().startswith("PASS")
285
+ return passed, result
286
+
287
+ async def evaluate_tool_usage(
288
+ self,
289
+ user_query: str,
290
+ tool_calls: list[dict],
291
+ expected_tools: list[str],
292
+ ) -> tuple[bool, str]:
293
+ """
294
+ Evaluate whether the agent used the expected tools.
295
+
296
+ Args:
297
+ user_query: The original user query
298
+ tool_calls: List of tool calls made by the agent
299
+ expected_tools: List of tool names that should have been called
300
+
301
+ Returns:
302
+ Tuple of (passed: bool, explanation: str)
303
+ """
304
+ tool_names = [tc.get("function", {}).get("name", tc.get("name", "unknown"))
305
+ for tc in tool_calls]
306
+
307
+ missing = set(expected_tools) - set(tool_names)
308
+ if missing:
309
+ return False, f"Missing expected tools: {missing}. Called: {tool_names}"
310
+
311
+ return True, f"All expected tools were called: {tool_names}"
312
+
313
+
314
+ def create_test_context(
315
+ message: str,
316
+ *,
317
+ tools: Optional[list[Tool]] = None,
318
+ metadata: Optional[dict] = None,
319
+ params: Optional[dict] = None,
320
+ ) -> MockRunContext:
321
+ """
322
+ Convenience function to create a test context.
323
+
324
+ Example:
325
+ ctx = create_test_context("Hello, agent!", tools=[my_tool])
326
+ result = await agent.run(ctx)
327
+ """
328
+ registry = ToolRegistry()
329
+ if tools:
330
+ for tool in tools:
331
+ registry.register(tool)
332
+
333
+ return MockRunContext(
334
+ input_messages=[{"role": "user", "content": message}],
335
+ tool_registry=registry,
336
+ metadata=metadata or {},
337
+ params=params or {},
338
+ )
339
+
340
+
341
+ async def run_agent_test(
342
+ agent: AgentRuntime,
343
+ message: str,
344
+ *,
345
+ tools: Optional[list[Tool]] = None,
346
+ metadata: Optional[dict] = None,
347
+ ) -> tuple[RunResult, MockRunContext]:
348
+ """
349
+ Run an agent with a test message and return both result and context.
350
+
351
+ Example:
352
+ result, ctx = await run_agent_test(my_agent, "Hello!")
353
+ assert "greeting" in result.final_output
354
+ assert len(ctx.get_events()) > 0
355
+ """
356
+ ctx = create_test_context(message, tools=tools, metadata=metadata)
357
+ result = await agent.run(ctx)
358
+ return result, ctx
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-runtime-core
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Framework-agnostic Python library for executing AI agents with consistent patterns
5
5
  Project-URL: Homepage, https://github.com/colstrom/agent_runtime
6
6
  Project-URL: Repository, https://github.com/colstrom/agent_runtime
@@ -1,8 +1,9 @@
1
- agent_runtime/__init__.py,sha256=OKBk91ejYoCz7hoMY1S8klTsP3WtICXz3-jeN4R0PiM,2098
1
+ agent_runtime/__init__.py,sha256=KZxgCDGNeY8kXH-OGlTguIGwRHQPKz-fCgKQq_a0Ld8,2430
2
2
  agent_runtime/config.py,sha256=ZRjpILjsjeh_kl7873DtV2g_zaTrfdkb3NgdQ6ndb5Y,4981
3
- agent_runtime/interfaces.py,sha256=AGDY0w6muQnNiice9O3ogb8fRdgMRW6wqXpxcuyn0N0,10103
3
+ agent_runtime/interfaces.py,sha256=_sRH34NfnD8AVtRLyDXq2NCAPjatJ8PC-MSjfAlQ37s,10265
4
4
  agent_runtime/registry.py,sha256=sa0speDFxFCZlXoCge8cPNqWYUeWHyazs6tBer5Jg1w,1471
5
5
  agent_runtime/runner.py,sha256=Sb2FfSJvATaL7ideQZy2JhVZp0sSYGVIov93E-gxODU,12741
6
+ agent_runtime/testing.py,sha256=aqN67RdbTdYf_rJfp5pEpn2s_tkeU-3oSpzTdADxH5g,11126
6
7
  agent_runtime/events/__init__.py,sha256=JNH-D40O6yz2evIf1_r2o3w7OQjLt4Yebn-sBNLzzh8,1550
7
8
  agent_runtime/events/base.py,sha256=NfHYyoczxr40Er5emROi_aY_07m5hDrKsn31pdWY2DY,1950
8
9
  agent_runtime/events/memory.py,sha256=7qseR6RtdaP833FxEHwyPw5TC7l4brJHr8uEx0mLc1Y,2486
@@ -25,7 +26,7 @@ agent_runtime/state/sqlite.py,sha256=NwuiTBXELb2tyOoH91MZqRJaCk9h8PskyY2VUc5EMr0
25
26
  agent_runtime/tracing/__init__.py,sha256=m4WzfgJpnV5XCCoMpBYZdJU_JTkAdhEhl7M7tpf62RY,1246
26
27
  agent_runtime/tracing/langfuse.py,sha256=uThF0P6f1VJ1l1b7UuiFQ-oHZ-tCa9MbbHvTqkSuQ2A,3650
27
28
  agent_runtime/tracing/noop.py,sha256=MOm5eTrnf3d4WhiWrwVU5Kd3GmJ1903V0U7U3Qwho7U,746
28
- agent_runtime_core-0.1.1.dist-info/METADATA,sha256=0zjX3lE1GoR2nmSbImsBx_IuA5ZH2jsQ8NdVPG4wOSI,12478
29
- agent_runtime_core-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
30
- agent_runtime_core-0.1.1.dist-info/licenses/LICENSE,sha256=PcOO8aiOZ4H2MWYeKIis3o6xTCT1hNkDyCxHZhh1NeM,1070
31
- agent_runtime_core-0.1.1.dist-info/RECORD,,
29
+ agent_runtime_core-0.1.3.dist-info/METADATA,sha256=BIGLruppweXY5x9EupTMY7Ru8KwiViIKyxuz92BLIqk,12478
30
+ agent_runtime_core-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
31
+ agent_runtime_core-0.1.3.dist-info/licenses/LICENSE,sha256=PcOO8aiOZ4H2MWYeKIis3o6xTCT1hNkDyCxHZhh1NeM,1070
32
+ agent_runtime_core-0.1.3.dist-info/RECORD,,