agentassert 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentassert-0.1.0.dist-info/METADATA +395 -0
- agentassert-0.1.0.dist-info/RECORD +35 -0
- agentassert-0.1.0.dist-info/WHEEL +4 -0
- agentassert-0.1.0.dist-info/entry_points.txt +2 -0
- agentassert-0.1.0.dist-info/licenses/LICENSE +34 -0
- agentunit/__init__.py +102 -0
- agentunit/_version.py +4 -0
- agentunit/adapters/__init__.py +6 -0
- agentunit/adapters/base.py +33 -0
- agentunit/adapters/generic.py +105 -0
- agentunit/assertions/__init__.py +56 -0
- agentunit/assertions/behavior.py +206 -0
- agentunit/assertions/fluent.py +210 -0
- agentunit/assertions/matchers.py +365 -0
- agentunit/cli/__init__.py +5 -0
- agentunit/cli/commands/__init__.py +5 -0
- agentunit/cli/commands/run.py +125 -0
- agentunit/cli/main.py +37 -0
- agentunit/core/__init__.py +16 -0
- agentunit/core/collector.py +242 -0
- agentunit/core/item.py +71 -0
- agentunit/core/outcome.py +121 -0
- agentunit/core/runner.py +179 -0
- agentunit/core/session.py +135 -0
- agentunit/decorators.py +69 -0
- agentunit/fixtures.py +97 -0
- agentunit/mocks/__init__.py +10 -0
- agentunit/mocks/tool_mock.py +323 -0
- agentunit/reporters/__init__.py +5 -0
- agentunit/reporters/terminal.py +195 -0
- agentunit/trace/__init__.py +17 -0
- agentunit/trace/event.py +105 -0
- agentunit/trace/span.py +71 -0
- agentunit/trace/tracer.py +308 -0
- agentunit/trace/tree.py +153 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentassert
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Behavioral testing framework for AI agents — pytest for AI agents
|
|
5
|
+
Project-URL: Homepage, https://github.com/kaushikdhola/agentunit
|
|
6
|
+
Project-URL: Documentation, https://github.com/kaushikdhola/agentunit#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/kaushikdhola/agentunit.git
|
|
8
|
+
Project-URL: Issues, https://github.com/kaushikdhola/agentunit/issues
|
|
9
|
+
Author: Kaushik Dhola
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agents,ai,autogen,behavioral-testing,crewai,langchain,llm,testing
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Testing
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: click>=8.1.0
|
|
26
|
+
Requires-Dist: pluggy>=1.3.0
|
|
27
|
+
Requires-Dist: pydantic>=2.0.0
|
|
28
|
+
Requires-Dist: rich>=13.0.0
|
|
29
|
+
Provides-Extra: all-adapters
|
|
30
|
+
Requires-Dist: agentunit[autogen,crewai,langchain,llamaindex]; extra == 'all-adapters'
|
|
31
|
+
Provides-Extra: autogen
|
|
32
|
+
Requires-Dist: pyautogen>=0.4; extra == 'autogen'
|
|
33
|
+
Provides-Extra: crewai
|
|
34
|
+
Requires-Dist: crewai>=0.80; extra == 'crewai'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: mypy>=1.8.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pre-commit>=3.6.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.3.0; extra == 'dev'
|
|
41
|
+
Provides-Extra: langchain
|
|
42
|
+
Requires-Dist: langchain-core>=0.3; extra == 'langchain'
|
|
43
|
+
Requires-Dist: langchain>=0.3; extra == 'langchain'
|
|
44
|
+
Provides-Extra: llamaindex
|
|
45
|
+
Requires-Dist: llama-index>=0.12; extra == 'llamaindex'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
<p align="center">
|
|
49
|
+
<img src="https://img.shields.io/badge/AgentUnit-v0.1.0-blue?style=for-the-badge" alt="Version">
|
|
50
|
+
<img src="https://img.shields.io/badge/python-3.10+-green?style=for-the-badge&logo=python&logoColor=white" alt="Python">
|
|
51
|
+
<img src="https://img.shields.io/badge/license-MIT-orange?style=for-the-badge" alt="License">
|
|
52
|
+
</p>
|
|
53
|
+
|
|
54
|
+
<h1 align="center">AgentUnit</h1>
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<strong>The Behavioral Testing Framework for AI Agents</strong>
|
|
58
|
+
</p>
|
|
59
|
+
|
|
60
|
+
<p align="center">
|
|
61
|
+
<em>"pytest for AI Agents" — Write tests. Run agents. Ship with confidence.</em>
|
|
62
|
+
</p>
|
|
63
|
+
|
|
64
|
+
<p align="center">
|
|
65
|
+
<a href="#quick-start">Quick Start</a> •
|
|
66
|
+
<a href="#features">Features</a> •
|
|
67
|
+
<a href="#documentation">Documentation</a> •
|
|
68
|
+
<a href="#contributing">Contributing</a>
|
|
69
|
+
</p>
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## The Problem
|
|
74
|
+
|
|
75
|
+
Teams building AI agents face a critical gap in their development workflow:
|
|
76
|
+
|
|
77
|
+
| What Exists | What's Missing |
|
|
78
|
+
|-------------|----------------|
|
|
79
|
+
| Observability (LangSmith, Langfuse) | Behavioral test runners |
|
|
80
|
+
| Evaluation dashboards | CI/CD pass/fail gates |
|
|
81
|
+
| LLM output quality metrics | Agent execution path testing |
|
|
82
|
+
|
|
83
|
+
**AgentUnit fills this gap.** It's an open-source, framework-agnostic, local-first behavioral test framework designed specifically for AI agent pipelines.
|
|
84
|
+
|
|
85
|
+
## Why AgentUnit?
|
|
86
|
+
|
|
87
|
+
- **Framework Agnostic** — Works with LangChain, CrewAI, AutoGen, LlamaIndex, or raw API calls
|
|
88
|
+
- **Local First** — No cloud accounts, no dashboards, no external services required
|
|
89
|
+
- **CI/CD Native** — Designed for `git push → test → deploy` workflows
|
|
90
|
+
- **Deterministic** — Seeded execution for reproducible test runs
|
|
91
|
+
- **Developer Friendly** — Familiar pytest-like syntax and workflow
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Quick Start
|
|
96
|
+
|
|
97
|
+
### Installation
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install agentassert
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Write Your First Test
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
# tests/test_my_agent.py
|
|
107
|
+
from agentunit import agent_test, expect, mock_tool, contains
|
|
108
|
+
|
|
109
|
+
@agent_test
|
|
110
|
+
def test_research_agent_workflow(agent_harness):
|
|
111
|
+
"""Test that the research agent follows the correct tool sequence."""
|
|
112
|
+
|
|
113
|
+
# 1. Create mock tools with deterministic responses
|
|
114
|
+
search = mock_tool("web_search", returns={"results": ["AI breakthrough news"]})
|
|
115
|
+
summarize = mock_tool("summarize", returns="Key finding: AI is advancing rapidly")
|
|
116
|
+
|
|
117
|
+
# 2. Run your agent under test
|
|
118
|
+
trace = agent_harness.run(
|
|
119
|
+
agent=my_research_agent,
|
|
120
|
+
input="Find the latest AI news",
|
|
121
|
+
tools=[search, summarize]
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# 3. Assert on behavioral expectations
|
|
125
|
+
expect(trace).tool("web_search").was_called()
|
|
126
|
+
expect(trace).tool("web_search").called_before("summarize")
|
|
127
|
+
expect(trace).tool("web_search").called_with(query=contains("AI"))
|
|
128
|
+
expect(trace).completed_within_steps(10)
|
|
129
|
+
expect(trace).output.not_empty()
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Run Your Tests
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
$ agentunit run tests/
|
|
136
|
+
|
|
137
|
+
AgentUnit v0.1.0 — Behavioral Testing Framework for AI Agents
|
|
138
|
+
collecting ... 3 tests
|
|
139
|
+
|
|
140
|
+
tests/test_my_agent.py
|
|
141
|
+
✓ test_research_agent_workflow (2 steps, $0.002, 0.3s)
|
|
142
|
+
✓ test_handles_api_failure (1 steps, $0.001, 0.1s)
|
|
143
|
+
✓ test_stays_within_budget (4 steps, $0.008, 0.5s)
|
|
144
|
+
|
|
145
|
+
════════════════════════════════════════════════════════════
|
|
146
|
+
3 passed in 0.9s
|
|
147
|
+
════════════════════════════════════════════════════════════
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Features
|
|
153
|
+
|
|
154
|
+
### Mock Tools
|
|
155
|
+
|
|
156
|
+
Create deterministic tool responses for predictable testing:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
# Static response
|
|
160
|
+
search = mock_tool("web_search", returns={"results": ["item1", "item2"]})
|
|
161
|
+
|
|
162
|
+
# Sequential responses
|
|
163
|
+
api = mock_tool("api_call", returns_sequence=[
|
|
164
|
+
{"status": "pending"},
|
|
165
|
+
{"status": "processing"},
|
|
166
|
+
{"status": "complete"}
|
|
167
|
+
])
|
|
168
|
+
|
|
169
|
+
# Simulate failures
|
|
170
|
+
flaky_api = mock_tool("external_service", raises=ConnectionError("timeout"))
|
|
171
|
+
|
|
172
|
+
# Rate limiting simulation
|
|
173
|
+
limited_api = mock_tool("rate_limited_api", returns="ok", fail_after=5)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Behavioral Assertions
|
|
177
|
+
|
|
178
|
+
Assert on how your agent behaves, not just what it outputs:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
# Tool invocation assertions
|
|
182
|
+
expect(trace).tool("search").was_called()
|
|
183
|
+
expect(trace).tool("search").was_not_called()
|
|
184
|
+
expect(trace).tool("search").called_exactly(3)
|
|
185
|
+
expect(trace).tool("search").called_at_least(1)
|
|
186
|
+
|
|
187
|
+
# Execution order assertions
|
|
188
|
+
expect(trace).tool("fetch_data").called_before("process_data")
|
|
189
|
+
expect(trace).tool("cleanup").called_after("main_task")
|
|
190
|
+
|
|
191
|
+
# Argument matching with flexible matchers
|
|
192
|
+
expect(trace).tool("api").called_with(
|
|
193
|
+
query=contains("search term"),
|
|
194
|
+
limit=greater_than(0),
|
|
195
|
+
filters=has_key("category")
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Execution behavior assertions
|
|
199
|
+
expect(trace).completed()
|
|
200
|
+
expect(trace).completed_within_steps(15)
|
|
201
|
+
expect(trace).failed_gracefully()
|
|
202
|
+
|
|
203
|
+
# Output assertions
|
|
204
|
+
expect(trace).output.not_empty()
|
|
205
|
+
expect(trace).output.contains("success")
|
|
206
|
+
expect(trace).output.is_valid_json()
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Rich Matchers
|
|
210
|
+
|
|
211
|
+
Flexible matchers for complex assertion scenarios:
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from agentunit import (
|
|
215
|
+
# String matchers
|
|
216
|
+
contains, matches, starts_with, ends_with, any_string,
|
|
217
|
+
|
|
218
|
+
# Numeric matchers
|
|
219
|
+
greater_than, less_than, between,
|
|
220
|
+
|
|
221
|
+
# Collection matchers
|
|
222
|
+
has_key, has_length, contains_item,
|
|
223
|
+
|
|
224
|
+
# Logical matchers
|
|
225
|
+
all_of, any_of, not_, anything
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Combine matchers for precise assertions
|
|
229
|
+
expect(trace).tool("search").called_with(
|
|
230
|
+
query=all_of(
|
|
231
|
+
starts_with("user:"),
|
|
232
|
+
contains("search"),
|
|
233
|
+
not_(contains("admin"))
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## CLI Reference
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
# Run all tests in current directory
|
|
244
|
+
agentunit run
|
|
245
|
+
|
|
246
|
+
# Run tests in a specific directory
|
|
247
|
+
agentunit run tests/
|
|
248
|
+
|
|
249
|
+
# Run a specific test file
|
|
250
|
+
agentunit run tests/test_research_agent.py
|
|
251
|
+
|
|
252
|
+
# Filter tests by keyword
|
|
253
|
+
agentunit run -k "search"
|
|
254
|
+
|
|
255
|
+
# Verbose output with full tracebacks
|
|
256
|
+
agentunit run -v
|
|
257
|
+
|
|
258
|
+
# Minimal output (dots only)
|
|
259
|
+
agentunit run -q
|
|
260
|
+
|
|
261
|
+
# Set random seed for reproducibility
|
|
262
|
+
agentunit run --seed 12345
|
|
263
|
+
|
|
264
|
+
# Stop on first failure
|
|
265
|
+
agentunit run -x
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Framework Integration
|
|
271
|
+
|
|
272
|
+
AgentUnit is designed to work with any agent framework:
|
|
273
|
+
|
|
274
|
+
<table>
|
|
275
|
+
<tr>
|
|
276
|
+
<td width="50%">
|
|
277
|
+
|
|
278
|
+
**LangChain**
|
|
279
|
+
```python
|
|
280
|
+
@agent_test
|
|
281
|
+
def test_langchain_agent(agent_harness):
|
|
282
|
+
from langchain.agents import AgentExecutor
|
|
283
|
+
|
|
284
|
+
trace = agent_harness.run(
|
|
285
|
+
agent=my_langchain_agent,
|
|
286
|
+
input="Analyze this data"
|
|
287
|
+
)
|
|
288
|
+
expect(trace).completed()
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
</td>
|
|
292
|
+
<td width="50%">
|
|
293
|
+
|
|
294
|
+
**Custom Agents**
|
|
295
|
+
```python
|
|
296
|
+
@agent_test
|
|
297
|
+
def test_custom_agent(agent_harness):
|
|
298
|
+
def my_agent(prompt, tools):
|
|
299
|
+
# Your custom logic
|
|
300
|
+
return result
|
|
301
|
+
|
|
302
|
+
trace = agent_harness.run(
|
|
303
|
+
agent=my_agent,
|
|
304
|
+
input="Process request"
|
|
305
|
+
)
|
|
306
|
+
expect(trace).completed()
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
</td>
|
|
310
|
+
</tr>
|
|
311
|
+
</table>
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
## CI/CD Integration
|
|
316
|
+
|
|
317
|
+
### GitHub Actions
|
|
318
|
+
|
|
319
|
+
```yaml
|
|
320
|
+
name: Agent Tests
|
|
321
|
+
|
|
322
|
+
on: [push, pull_request]
|
|
323
|
+
|
|
324
|
+
jobs:
|
|
325
|
+
test:
|
|
326
|
+
runs-on: ubuntu-latest
|
|
327
|
+
steps:
|
|
328
|
+
- uses: actions/checkout@v4
|
|
329
|
+
|
|
330
|
+
- name: Set up Python
|
|
331
|
+
uses: actions/setup-python@v5
|
|
332
|
+
with:
|
|
333
|
+
python-version: '3.11'
|
|
334
|
+
|
|
335
|
+
- name: Install dependencies
|
|
336
|
+
run: |
|
|
337
|
+
pip install agentassert
|
|
338
|
+
pip install -r requirements.txt
|
|
339
|
+
|
|
340
|
+
- name: Run agent tests
|
|
341
|
+
run: agentunit run tests/ -v
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
### GitLab CI
|
|
345
|
+
|
|
346
|
+
```yaml
|
|
347
|
+
agent-tests:
|
|
348
|
+
image: python:3.11
|
|
349
|
+
script:
|
|
350
|
+
- pip install agentassert
|
|
351
|
+
- agentunit run tests/
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
## Comparison with Alternatives
|
|
357
|
+
|
|
358
|
+
| Feature | AgentUnit | LangSmith | Langfuse | DeepEval |
|
|
359
|
+
|---------|-----------|-----------|----------|----------|
|
|
360
|
+
| Local execution | Yes | No | Partial | Yes |
|
|
361
|
+
| No account required | Yes | No | No | Yes |
|
|
362
|
+
| Framework agnostic | Yes | No | Yes | Yes |
|
|
363
|
+
| Behavioral assertions | Yes | No | No | No |
|
|
364
|
+
| Tool call testing | Yes | Partial | Partial | No |
|
|
365
|
+
| CI/CD native | Yes | Partial | Partial | Yes |
|
|
366
|
+
| Deterministic replay | Yes | No | No | No |
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
## Documentation
|
|
371
|
+
|
|
372
|
+
- **[Sample Tests](tests/test_sample.py)** — Working examples to get started
|
|
373
|
+
- **[Contributing Guide](CONTRIBUTING.md)** — How to contribute to the project
|
|
374
|
+
|
|
375
|
+
---
|
|
376
|
+
|
|
377
|
+
## Contributing
|
|
378
|
+
|
|
379
|
+
We welcome contributions from the community. Please read our [Contributing Guidelines](CONTRIBUTING.md) before submitting PRs.
|
|
380
|
+
|
|
381
|
+
---
|
|
382
|
+
|
|
383
|
+
## License
|
|
384
|
+
|
|
385
|
+
AgentUnit is released under the **MIT License**. See [LICENSE](LICENSE) for details.
|
|
386
|
+
|
|
387
|
+
---
|
|
388
|
+
|
|
389
|
+
<p align="center">
|
|
390
|
+
<strong>Built by <a href="https://github.com/kaushikdhola">Kaushik Dhola</a></strong>
|
|
391
|
+
</p>
|
|
392
|
+
|
|
393
|
+
<p align="center">
|
|
394
|
+
<code>pip install agentassert</code>
|
|
395
|
+
</p>
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
agentunit/__init__.py,sha256=ykv-40E7xGuiGRFP6dlyuvFqguWAzLDfjSKKSVVSpE8,2154
|
|
2
|
+
agentunit/_version.py,sha256=aEtcAlwTYV6zYLWc8oarN4Co2KKHe167d4i5OirqLEg,129
|
|
3
|
+
agentunit/decorators.py,sha256=TWvkjDR3tddWHjY9DKZLJXqg7nzcFC5dzNh9f6zrcHc,1888
|
|
4
|
+
agentunit/fixtures.py,sha256=x6Q6SfWnuI5ZxX-0xRX15y_nywLOkTCu7-Kbe3LnqyI,3061
|
|
5
|
+
agentunit/adapters/__init__.py,sha256=n6EL9g7m9E9rbORV1xWrFffeuytzVK9hsLev7D47xsY,188
|
|
6
|
+
agentunit/adapters/base.py,sha256=kJIP5F1UrvKCdgzAzIOmfEnl_lb1LfOh5W54j0OF-mU,963
|
|
7
|
+
agentunit/adapters/generic.py,sha256=GmqyxsaVtT8k_KLD7p7SRmrpajGWSnTot-qtedS1eo0,3715
|
|
8
|
+
agentunit/assertions/__init__.py,sha256=JltpJE75KlRy1Kf-M22D3tuEFeLJIPedSCBnjCdR598,1028
|
|
9
|
+
agentunit/assertions/behavior.py,sha256=a2bHrLAKDbjCZ-wFMkaJQQT36cbN3FuUWxnr9PzfSHM,7708
|
|
10
|
+
agentunit/assertions/fluent.py,sha256=CxjgdTTZUkdmT895vDeuEONb3sqzALULN8KRW_8XpEY,7969
|
|
11
|
+
agentunit/assertions/matchers.py,sha256=oahS5WL7unZIlvF1SfXePytCykNd51nSVEQj4HIOJS0,8717
|
|
12
|
+
agentunit/cli/__init__.py,sha256=8K6ZMmLxeYtHL5H9Tyl5EDp87R6Hcgi43Ikyth-5CTU,90
|
|
13
|
+
agentunit/cli/main.py,sha256=69kCeRouTue9gpM1bh5AVu5QOHpzoCgqry8dA_bfHo0,721
|
|
14
|
+
agentunit/cli/commands/__init__.py,sha256=C3Dh-Nk8xbxPf_yNdlmsApeOqWqHOsox3wCRm3kT7lk,91
|
|
15
|
+
agentunit/cli/commands/run.py,sha256=g9qaLo5-Rnr8koq4I2oFxofusIlbCU2i5lJurvmioXA,3078
|
|
16
|
+
agentunit/core/__init__.py,sha256=5rT3qGBsZH1HCOh2gHINxn-5ng_7iXN4IqFugXPcvLw,416
|
|
17
|
+
agentunit/core/collector.py,sha256=Si29fUOnTwIuo1NGG2khtoT3zinLpv4-xvbVp30CmC8,8750
|
|
18
|
+
agentunit/core/item.py,sha256=-mGJuQEhKLFcbS2zinfBG49HKoCfDVNdJOydV07t_7E,2453
|
|
19
|
+
agentunit/core/outcome.py,sha256=m6pMZfE1bYMxlGGdFQOYqlwl6nE8aAdsxxO12UpAveI,3816
|
|
20
|
+
agentunit/core/runner.py,sha256=OYwYxz7p0GX4VswqJ0nrHNO9-zbSN3b3iGKyUCvRppk,5797
|
|
21
|
+
agentunit/core/session.py,sha256=fHSI356YzdYX1BjCDvs2PhhJD752yeaWnuRxh1_iFTk,4261
|
|
22
|
+
agentunit/mocks/__init__.py,sha256=9_ntv4aYwYHvJnNJw4WxhuyRoBKnT63eMrzTYNgOvrk,198
|
|
23
|
+
agentunit/mocks/tool_mock.py,sha256=dUkeq1qgRtJNaHHTll6hILGXnlnkJlR1fpuhmKTBo3w,10136
|
|
24
|
+
agentunit/reporters/__init__.py,sha256=O4HLkliIBGD24YApL_-h6wjV7g6KSx51OTujvDy4qIo,136
|
|
25
|
+
agentunit/reporters/terminal.py,sha256=3uzaUOMkMuRtZFpz08TbK4ITJYKQCx0V17uAIXzr1gI,7223
|
|
26
|
+
agentunit/trace/__init__.py,sha256=vtIOGUOUqYfaW-OKhcD1JiSItEpeLOz_BPpVLnW1bDA,438
|
|
27
|
+
agentunit/trace/event.py,sha256=hKk9it2F6oJ47A8NMxSgazmVfT-kUBKxypaPD1ghAoU,3259
|
|
28
|
+
agentunit/trace/span.py,sha256=J5F7iqIMMLN_ZXGQp9vT0JSjidoeVA6x0KavBAgz5Rc,2362
|
|
29
|
+
agentunit/trace/tracer.py,sha256=cHAm4d0lIE5PJkjEWZCLPA6tcSycfEOlSgSauAZGfrQ,9724
|
|
30
|
+
agentunit/trace/tree.py,sha256=6bm0SroaYoGlb-5Y1cptZBTbscCEEl-IoYSPs7uNrhQ,5203
|
|
31
|
+
agentassert-0.1.0.dist-info/METADATA,sha256=N5AuGjBO455tDzP8ZC1_wG6oNa6sOQBzp-PZ1RoSizw,10542
|
|
32
|
+
agentassert-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
33
|
+
agentassert-0.1.0.dist-info/entry_points.txt,sha256=5GCpgYiuc0Wul7fKLxB1uwTrlXwHSkm-uMlkduGf2_Y,54
|
|
34
|
+
agentassert-0.1.0.dist-info/licenses/LICENSE,sha256=5Wo7S054PlecufN8LEvp6DaoR1-gS5BrDQ97d3hIE_E,1441
|
|
35
|
+
agentassert-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kaushik Dhola
|
|
4
|
+
|
|
5
|
+
All rights reserved.
|
|
6
|
+
|
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
in the Software without restriction, including without limitation the rights
|
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
furnished to do so, subject to the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
SOFTWARE.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
ATTRIBUTION NOTICE:
|
|
28
|
+
|
|
29
|
+
When using AgentUnit in your projects, please provide appropriate credit by:
|
|
30
|
+
1. Retaining this license file in any distribution
|
|
31
|
+
2. Acknowledging AgentUnit in your project documentation
|
|
32
|
+
3. Not misrepresenting the origin of this software
|
|
33
|
+
|
|
34
|
+
For commercial support or enterprise licensing inquiries, please contact the author.
|
agentunit/__init__.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AgentUnit — Behavioral testing framework for AI agents.
|
|
3
|
+
|
|
4
|
+
AgentUnit is the missing test runner for AI agents — framework-agnostic,
|
|
5
|
+
locally executable, and built for CI/CD pipelines.
|
|
6
|
+
|
|
7
|
+
Example usage:
|
|
8
|
+
import agentunit as au
|
|
9
|
+
from agentunit import agent_test, expect, mock_tool
|
|
10
|
+
|
|
11
|
+
@agent_test
|
|
12
|
+
def test_research_agent_calls_search(agent_harness):
|
|
13
|
+
search = mock_tool("web_search", returns={"results": ["AI news"]})
|
|
14
|
+
|
|
15
|
+
trace = agent_harness.run(
|
|
16
|
+
agent=my_agent,
|
|
17
|
+
input="Find AI news",
|
|
18
|
+
tools=[search]
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
expect(trace).tool("web_search").was_called()
|
|
22
|
+
expect(trace).completed_within_steps(10)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from agentunit._version import __version__, __version_info__
|
|
26
|
+
|
|
27
|
+
# Decorators
|
|
28
|
+
from agentunit.decorators import agent_test, fixture, scenario, scenarios
|
|
29
|
+
|
|
30
|
+
# Fluent assertions
|
|
31
|
+
from agentunit.assertions.fluent import expect
|
|
32
|
+
|
|
33
|
+
# Mocks
|
|
34
|
+
from agentunit.mocks.tool_mock import mock_tool, MockTool, MockToolset
|
|
35
|
+
|
|
36
|
+
# Matchers (for called_with assertions)
|
|
37
|
+
from agentunit.assertions.matchers import (
|
|
38
|
+
contains,
|
|
39
|
+
matches,
|
|
40
|
+
starts_with,
|
|
41
|
+
ends_with,
|
|
42
|
+
any_string,
|
|
43
|
+
greater_than,
|
|
44
|
+
less_than,
|
|
45
|
+
between,
|
|
46
|
+
has_key,
|
|
47
|
+
has_length,
|
|
48
|
+
contains_item,
|
|
49
|
+
is_type,
|
|
50
|
+
is_not_none,
|
|
51
|
+
is_none,
|
|
52
|
+
anything,
|
|
53
|
+
all_of,
|
|
54
|
+
any_of,
|
|
55
|
+
not_,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Core types for type hints
|
|
59
|
+
from agentunit.trace.tracer import AgentTrace
|
|
60
|
+
from agentunit.core.outcome import Outcome, OutcomeStatus
|
|
61
|
+
from agentunit.fixtures import AgentHarness
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
# Version
|
|
65
|
+
"__version__",
|
|
66
|
+
"__version_info__",
|
|
67
|
+
# Decorators
|
|
68
|
+
"agent_test",
|
|
69
|
+
"fixture",
|
|
70
|
+
"scenario",
|
|
71
|
+
"scenarios",
|
|
72
|
+
# Assertions
|
|
73
|
+
"expect",
|
|
74
|
+
# Mocks
|
|
75
|
+
"mock_tool",
|
|
76
|
+
"MockTool",
|
|
77
|
+
"MockToolset",
|
|
78
|
+
# Matchers
|
|
79
|
+
"contains",
|
|
80
|
+
"matches",
|
|
81
|
+
"starts_with",
|
|
82
|
+
"ends_with",
|
|
83
|
+
"any_string",
|
|
84
|
+
"greater_than",
|
|
85
|
+
"less_than",
|
|
86
|
+
"between",
|
|
87
|
+
"has_key",
|
|
88
|
+
"has_length",
|
|
89
|
+
"contains_item",
|
|
90
|
+
"is_type",
|
|
91
|
+
"is_not_none",
|
|
92
|
+
"is_none",
|
|
93
|
+
"anything",
|
|
94
|
+
"all_of",
|
|
95
|
+
"any_of",
|
|
96
|
+
"not_",
|
|
97
|
+
# Core types
|
|
98
|
+
"AgentTrace",
|
|
99
|
+
"AgentHarness",
|
|
100
|
+
"Outcome",
|
|
101
|
+
"OutcomeStatus",
|
|
102
|
+
]
|
agentunit/_version.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Base adapter interface for framework integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from agentunit.trace.tracer import AgentTrace
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseAdapter(ABC):
|
|
12
|
+
"""
|
|
13
|
+
Base interface for framework adapters.
|
|
14
|
+
|
|
15
|
+
Adapters bridge AgentUnit's generic harness and tracer with framework-specific
|
|
16
|
+
execution models.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def wrap_agent(self, agent: Any) -> Any:
|
|
21
|
+
"""Wrap an agent object with adapter-specific instrumentation."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def run_agent(self, agent: Any, input: Any, **kwargs: Any) -> Any:
|
|
25
|
+
"""Execute the agent and return its output."""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def extract_trace(self) -> AgentTrace:
|
|
29
|
+
"""Extract and return the execution trace."""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def inject_mock_tools(self, agent: Any, tools: list[Any]) -> Any:
|
|
33
|
+
"""Replace real tools with mock tools on the given agent object."""
|