checkagent 0.0.1a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checkagent-0.0.1a1/.github/workflows/ci.yml +54 -0
  2. checkagent-0.0.1a1/.github/workflows/publish.yml +49 -0
  3. checkagent-0.0.1a1/.gitignore +38 -0
  4. checkagent-0.0.1a1/CLAUDE.md +149 -0
  5. checkagent-0.0.1a1/CODE_OF_CONDUCT.md +27 -0
  6. checkagent-0.0.1a1/CONTRIBUTING.md +68 -0
  7. checkagent-0.0.1a1/LICENSE +190 -0
  8. checkagent-0.0.1a1/PKG-INFO +178 -0
  9. checkagent-0.0.1a1/README.md +127 -0
  10. checkagent-0.0.1a1/ROADMAP.md +100 -0
  11. checkagent-0.0.1a1/action/action.yml +158 -0
  12. checkagent-0.0.1a1/examples/case_study/agent.py +65 -0
  13. checkagent-0.0.1a1/examples/case_study/test_all_layers.py +272 -0
  14. checkagent-0.0.1a1/examples/case_study_multiturn/__init__.py +0 -0
  15. checkagent-0.0.1a1/examples/case_study_multiturn/agent.py +99 -0
  16. checkagent-0.0.1a1/examples/case_study_multiturn/test_all_layers.py +264 -0
  17. checkagent-0.0.1a1/examples/case_study_rag/agent.py +68 -0
  18. checkagent-0.0.1a1/examples/case_study_rag/test_all_layers.py +308 -0
  19. checkagent-0.0.1a1/pyproject.toml +92 -0
  20. checkagent-0.0.1a1/src/checkagent/__init__.py +94 -0
  21. checkagent-0.0.1a1/src/checkagent/adapters/__init__.py +32 -0
  22. checkagent-0.0.1a1/src/checkagent/adapters/anthropic.py +210 -0
  23. checkagent-0.0.1a1/src/checkagent/adapters/crewai.py +189 -0
  24. checkagent-0.0.1a1/src/checkagent/adapters/generic.py +109 -0
  25. checkagent-0.0.1a1/src/checkagent/adapters/langchain.py +241 -0
  26. checkagent-0.0.1a1/src/checkagent/adapters/openai_agents.py +221 -0
  27. checkagent-0.0.1a1/src/checkagent/adapters/pydantic_ai.py +188 -0
  28. checkagent-0.0.1a1/src/checkagent/ci/__init__.py +40 -0
  29. checkagent-0.0.1a1/src/checkagent/ci/entrypoint.py +100 -0
  30. checkagent-0.0.1a1/src/checkagent/ci/junit_xml.py +310 -0
  31. checkagent-0.0.1a1/src/checkagent/ci/quality_gate.py +169 -0
  32. checkagent-0.0.1a1/src/checkagent/ci/reporter.py +192 -0
  33. checkagent-0.0.1a1/src/checkagent/cli/__init__.py +28 -0
  34. checkagent-0.0.1a1/src/checkagent/cli/demo.py +185 -0
  35. checkagent-0.0.1a1/src/checkagent/cli/import_trace.py +200 -0
  36. checkagent-0.0.1a1/src/checkagent/cli/init.py +150 -0
  37. checkagent-0.0.1a1/src/checkagent/cli/migrate.py +66 -0
  38. checkagent-0.0.1a1/src/checkagent/cli/run.py +42 -0
  39. checkagent-0.0.1a1/src/checkagent/conversation/__init__.py +5 -0
  40. checkagent-0.0.1a1/src/checkagent/conversation/session.py +227 -0
  41. checkagent-0.0.1a1/src/checkagent/core/__init__.py +37 -0
  42. checkagent-0.0.1a1/src/checkagent/core/adapter.py +26 -0
  43. checkagent-0.0.1a1/src/checkagent/core/config.py +237 -0
  44. checkagent-0.0.1a1/src/checkagent/core/cost.py +338 -0
  45. checkagent-0.0.1a1/src/checkagent/core/plugin.py +266 -0
  46. checkagent-0.0.1a1/src/checkagent/core/types.py +145 -0
  47. checkagent-0.0.1a1/src/checkagent/datasets/__init__.py +12 -0
  48. checkagent-0.0.1a1/src/checkagent/datasets/loader.py +127 -0
  49. checkagent-0.0.1a1/src/checkagent/datasets/schema.py +75 -0
  50. checkagent-0.0.1a1/src/checkagent/eval/__init__.py +44 -0
  51. checkagent-0.0.1a1/src/checkagent/eval/aggregate.py +265 -0
  52. checkagent-0.0.1a1/src/checkagent/eval/assertions.py +343 -0
  53. checkagent-0.0.1a1/src/checkagent/eval/evaluator.py +123 -0
  54. checkagent-0.0.1a1/src/checkagent/eval/metrics.py +237 -0
  55. checkagent-0.0.1a1/src/checkagent/judge/__init__.py +34 -0
  56. checkagent-0.0.1a1/src/checkagent/judge/consensus.py +127 -0
  57. checkagent-0.0.1a1/src/checkagent/judge/judge.py +281 -0
  58. checkagent-0.0.1a1/src/checkagent/judge/types.py +157 -0
  59. checkagent-0.0.1a1/src/checkagent/judge/verdict.py +84 -0
  60. checkagent-0.0.1a1/src/checkagent/mock/__init__.py +73 -0
  61. checkagent-0.0.1a1/src/checkagent/mock/fault.py +605 -0
  62. checkagent-0.0.1a1/src/checkagent/mock/llm.py +582 -0
  63. checkagent-0.0.1a1/src/checkagent/mock/mcp.py +342 -0
  64. checkagent-0.0.1a1/src/checkagent/mock/tool.py +487 -0
  65. checkagent-0.0.1a1/src/checkagent/multiagent/__init__.py +25 -0
  66. checkagent-0.0.1a1/src/checkagent/multiagent/credit.py +229 -0
  67. checkagent-0.0.1a1/src/checkagent/multiagent/trace.py +235 -0
  68. checkagent-0.0.1a1/src/checkagent/replay/__init__.py +44 -0
  69. checkagent-0.0.1a1/src/checkagent/replay/cassette.py +196 -0
  70. checkagent-0.0.1a1/src/checkagent/replay/engine.py +167 -0
  71. checkagent-0.0.1a1/src/checkagent/replay/migration.py +211 -0
  72. checkagent-0.0.1a1/src/checkagent/replay/recorder.py +161 -0
  73. checkagent-0.0.1a1/src/checkagent/safety/__init__.py +70 -0
  74. checkagent-0.0.1a1/src/checkagent/safety/compliance.py +438 -0
  75. checkagent-0.0.1a1/src/checkagent/safety/conversation_scanner.py +138 -0
  76. checkagent-0.0.1a1/src/checkagent/safety/evaluator.py +71 -0
  77. checkagent-0.0.1a1/src/checkagent/safety/injection.py +194 -0
  78. checkagent-0.0.1a1/src/checkagent/safety/pii.py +133 -0
  79. checkagent-0.0.1a1/src/checkagent/safety/probes/__init__.py +26 -0
  80. checkagent-0.0.1a1/src/checkagent/safety/probes/base.py +98 -0
  81. checkagent-0.0.1a1/src/checkagent/safety/probes/injection.py +431 -0
  82. checkagent-0.0.1a1/src/checkagent/safety/probes/jailbreak.py +219 -0
  83. checkagent-0.0.1a1/src/checkagent/safety/probes/pii.py +126 -0
  84. checkagent-0.0.1a1/src/checkagent/safety/probes/scope.py +101 -0
  85. checkagent-0.0.1a1/src/checkagent/safety/refusal.py +141 -0
  86. checkagent-0.0.1a1/src/checkagent/safety/system_prompt.py +121 -0
  87. checkagent-0.0.1a1/src/checkagent/safety/taxonomy.py +65 -0
  88. checkagent-0.0.1a1/src/checkagent/safety/tool_boundary.py +180 -0
  89. checkagent-0.0.1a1/src/checkagent/streaming/__init__.py +7 -0
  90. checkagent-0.0.1a1/src/checkagent/streaming/collector.py +107 -0
  91. checkagent-0.0.1a1/src/checkagent/trace_import/__init__.py +22 -0
  92. checkagent-0.0.1a1/src/checkagent/trace_import/base.py +39 -0
  93. checkagent-0.0.1a1/src/checkagent/trace_import/json_importer.py +214 -0
  94. checkagent-0.0.1a1/src/checkagent/trace_import/otel_importer.py +239 -0
  95. checkagent-0.0.1a1/src/checkagent/trace_import/pii.py +123 -0
  96. checkagent-0.0.1a1/src/checkagent/trace_import/testcase_gen.py +151 -0
  97. checkagent-0.0.1a1/tests/__init__.py +0 -0
  98. checkagent-0.0.1a1/tests/adapters/__init__.py +0 -0
  99. checkagent-0.0.1a1/tests/adapters/test_anthropic.py +321 -0
  100. checkagent-0.0.1a1/tests/adapters/test_crewai.py +275 -0
  101. checkagent-0.0.1a1/tests/adapters/test_generic.py +151 -0
  102. checkagent-0.0.1a1/tests/adapters/test_langchain.py +420 -0
  103. checkagent-0.0.1a1/tests/adapters/test_openai_agents.py +318 -0
  104. checkagent-0.0.1a1/tests/adapters/test_pydantic_ai.py +334 -0
  105. checkagent-0.0.1a1/tests/ci/__init__.py +0 -0
  106. checkagent-0.0.1a1/tests/ci/test_entrypoint.py +100 -0
  107. checkagent-0.0.1a1/tests/ci/test_junit_xml.py +396 -0
  108. checkagent-0.0.1a1/tests/ci/test_quality_gate.py +163 -0
  109. checkagent-0.0.1a1/tests/ci/test_reporter.py +139 -0
  110. checkagent-0.0.1a1/tests/cli/__init__.py +0 -0
  111. checkagent-0.0.1a1/tests/cli/test_demo.py +37 -0
  112. checkagent-0.0.1a1/tests/cli/test_init.py +120 -0
  113. checkagent-0.0.1a1/tests/cli/test_main.py +30 -0
  114. checkagent-0.0.1a1/tests/cli/test_run.py +51 -0
  115. checkagent-0.0.1a1/tests/conversation/__init__.py +0 -0
  116. checkagent-0.0.1a1/tests/conversation/test_session.py +451 -0
  117. checkagent-0.0.1a1/tests/core/__init__.py +0 -0
  118. checkagent-0.0.1a1/tests/core/test_adapter.py +52 -0
  119. checkagent-0.0.1a1/tests/core/test_config.py +262 -0
  120. checkagent-0.0.1a1/tests/core/test_cost.py +363 -0
  121. checkagent-0.0.1a1/tests/core/test_plugin.py +225 -0
  122. checkagent-0.0.1a1/tests/core/test_types.py +175 -0
  123. checkagent-0.0.1a1/tests/datasets/__init__.py +0 -0
  124. checkagent-0.0.1a1/tests/datasets/test_loader.py +189 -0
  125. checkagent-0.0.1a1/tests/datasets/test_schema.py +137 -0
  126. checkagent-0.0.1a1/tests/eval/__init__.py +0 -0
  127. checkagent-0.0.1a1/tests/eval/test_aggregate.py +242 -0
  128. checkagent-0.0.1a1/tests/eval/test_assertions.py +437 -0
  129. checkagent-0.0.1a1/tests/eval/test_evaluator.py +151 -0
  130. checkagent-0.0.1a1/tests/eval/test_metrics.py +334 -0
  131. checkagent-0.0.1a1/tests/experiments/__init__.py +0 -0
  132. checkagent-0.0.1a1/tests/experiments/test_e060_fault_resilience.py +367 -0
  133. checkagent-0.0.1a1/tests/experiments/test_e061_multiturn_bugs.py +536 -0
  134. checkagent-0.0.1a1/tests/experiments/test_e062_rag_replay_regressions.py +413 -0
  135. checkagent-0.0.1a1/tests/experiments/test_e063_async_sync_overhead.py +274 -0
  136. checkagent-0.0.1a1/tests/experiments/test_e064_multiturn_safety.py +517 -0
  137. checkagent-0.0.1a1/tests/experiments/test_e066_conversation_scanner_detection.py +172 -0
  138. checkagent-0.0.1a1/tests/experiments/test_e068_usability_comparison.py +470 -0
  139. checkagent-0.0.1a1/tests/experiments/test_e069_owasp_coverage.py +260 -0
  140. checkagent-0.0.1a1/tests/judge/__init__.py +0 -0
  141. checkagent-0.0.1a1/tests/judge/test_consensus.py +253 -0
  142. checkagent-0.0.1a1/tests/judge/test_fixture.py +40 -0
  143. checkagent-0.0.1a1/tests/judge/test_judge.py +298 -0
  144. checkagent-0.0.1a1/tests/judge/test_judge_cost.py +311 -0
  145. checkagent-0.0.1a1/tests/judge/test_types.py +160 -0
  146. checkagent-0.0.1a1/tests/judge/test_verdict.py +132 -0
  147. checkagent-0.0.1a1/tests/judge/test_verdict_comparison.py +375 -0
  148. checkagent-0.0.1a1/tests/mock/__init__.py +0 -0
  149. checkagent-0.0.1a1/tests/mock/test_fault.py +662 -0
  150. checkagent-0.0.1a1/tests/mock/test_fault_integration.py +403 -0
  151. checkagent-0.0.1a1/tests/mock/test_literal_response.py +95 -0
  152. checkagent-0.0.1a1/tests/mock/test_llm.py +366 -0
  153. checkagent-0.0.1a1/tests/mock/test_llm_usage.py +186 -0
  154. checkagent-0.0.1a1/tests/mock/test_mcp.py +495 -0
  155. checkagent-0.0.1a1/tests/mock/test_tool.py +533 -0
  156. checkagent-0.0.1a1/tests/multiagent/__init__.py +0 -0
  157. checkagent-0.0.1a1/tests/multiagent/test_credit.py +304 -0
  158. checkagent-0.0.1a1/tests/multiagent/test_trace.py +540 -0
  159. checkagent-0.0.1a1/tests/replay/__init__.py +0 -0
  160. checkagent-0.0.1a1/tests/replay/test_cassette.py +287 -0
  161. checkagent-0.0.1a1/tests/replay/test_cassette_benchmark.py +195 -0
  162. checkagent-0.0.1a1/tests/replay/test_engine.py +284 -0
  163. checkagent-0.0.1a1/tests/replay/test_migration.py +370 -0
  164. checkagent-0.0.1a1/tests/replay/test_recorder.py +181 -0
  165. checkagent-0.0.1a1/tests/safety/__init__.py +0 -0
  166. checkagent-0.0.1a1/tests/safety/test_compliance.py +431 -0
  167. checkagent-0.0.1a1/tests/safety/test_conversation_scanner.py +328 -0
  168. checkagent-0.0.1a1/tests/safety/test_detection_rates.py +648 -0
  169. checkagent-0.0.1a1/tests/safety/test_fixture.py +37 -0
  170. checkagent-0.0.1a1/tests/safety/test_injection.py +98 -0
  171. checkagent-0.0.1a1/tests/safety/test_pii.py +135 -0
  172. checkagent-0.0.1a1/tests/safety/test_probes.py +247 -0
  173. checkagent-0.0.1a1/tests/safety/test_probes_jailbreak.py +104 -0
  174. checkagent-0.0.1a1/tests/safety/test_probes_pii.py +54 -0
  175. checkagent-0.0.1a1/tests/safety/test_probes_scope.py +79 -0
  176. checkagent-0.0.1a1/tests/safety/test_refusal.py +161 -0
  177. checkagent-0.0.1a1/tests/safety/test_system_prompt.py +104 -0
  178. checkagent-0.0.1a1/tests/safety/test_taxonomy.py +74 -0
  179. checkagent-0.0.1a1/tests/safety/test_tool_boundary.py +326 -0
  180. checkagent-0.0.1a1/tests/streaming/__init__.py +0 -0
  181. checkagent-0.0.1a1/tests/streaming/test_stream.py +357 -0
  182. checkagent-0.0.1a1/tests/test_cost_per_bug.py +475 -0
  183. checkagent-0.0.1a1/tests/test_framework_overhead.py +359 -0
  184. checkagent-0.0.1a1/tests/test_layer_effectiveness.py +824 -0
  185. checkagent-0.0.1a1/tests/test_readme_example.py +37 -0
  186. checkagent-0.0.1a1/tests/test_top_level_exports.py +89 -0
  187. checkagent-0.0.1a1/tests/trace_import/__init__.py +0 -0
  188. checkagent-0.0.1a1/tests/trace_import/test_cli_import.py +207 -0
  189. checkagent-0.0.1a1/tests/trace_import/test_json_importer.py +247 -0
  190. checkagent-0.0.1a1/tests/trace_import/test_otel_importer.py +261 -0
  191. checkagent-0.0.1a1/tests/trace_import/test_pii.py +114 -0
  192. checkagent-0.0.1a1/tests/trace_import/test_testcase_gen.py +175 -0
@@ -0,0 +1,54 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ test:
14
+ name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
15
+ runs-on: ${{ matrix.os }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ os: [ubuntu-latest, macos-latest, windows-latest]
20
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
21
+
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Install dependencies
31
+ run: |
32
+ python -m pip install --upgrade pip
33
+ pip install -e ".[dev]"
34
+
35
+ - name: Lint with ruff
36
+ run: ruff check src/ tests/
37
+
38
+ - name: Type check with mypy
39
+ run: mypy src/checkagent/ --ignore-missing-imports
40
+ continue-on-error: true
41
+
42
+ - name: Run tests
43
+ run: pytest tests/ -x --tb=short -q
44
+
45
+ - name: Run tests with coverage
46
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
47
+ run: pytest tests/ --cov=checkagent --cov-report=term-missing --cov-report=xml
48
+
49
+ - name: Upload coverage
50
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
51
+ uses: actions/upload-artifact@v4
52
+ with:
53
+ name: coverage-report
54
+ path: coverage.xml
@@ -0,0 +1,49 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ build:
12
+ name: Build distribution
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.12"
21
+
22
+ - name: Install build tools
23
+ run: pip install build
24
+
25
+ - name: Build package
26
+ run: python -m build
27
+
28
+ - name: Upload artifacts
29
+ uses: actions/upload-artifact@v4
30
+ with:
31
+ name: dist
32
+ path: dist/
33
+
34
+ publish:
35
+ name: Publish to PyPI
36
+ needs: build
37
+ runs-on: ubuntu-latest
38
+ environment: pypi
39
+ permissions:
40
+ id-token: write
41
+ steps:
42
+ - name: Download artifacts
43
+ uses: actions/download-artifact@v4
44
+ with:
45
+ name: dist
46
+ path: dist/
47
+
48
+ - name: Publish to PyPI
49
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ dist/
8
+ build/
9
+ .eggs/
10
+ *.egg
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
21
+ *.swo
22
+
23
+ # OS
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Testing
28
+ .pytest_cache/
29
+ .coverage
30
+ htmlcov/
31
+ .tox/
32
+
33
+ # Distribution
34
+ *.tar.gz
35
+ *.whl
36
+
37
+ # Cassettes may contain sensitive data — review before committing
38
+ # tests/cassettes/ # Uncomment to ignore all cassettes
@@ -0,0 +1,149 @@
1
+ # CheckAgent
2
+
3
+ Open-source, pytest-native testing framework for AI agents.
4
+
5
+ ## What This Project Does
6
+
7
+ CheckAgent is a pytest plugin for testing AI agent workflows across four layers:
8
+
9
+ 1. **MOCK** — Deterministic unit tests with mocked LLMs and tools (free, milliseconds)
10
+ 2. **REPLAY** — Record-and-replay regression testing (cheap, seconds)
11
+ 3. **EVAL** — Metric evaluation against golden datasets (moderate, seconds)
12
+ 4. **JUDGE** — LLM-as-judge with statistical assertions (expensive, minutes)
13
+
14
+ ## Source Layout
15
+
16
+ ```
17
+ src/checkagent/
18
+ ├── core/ # Plugin, adapter protocol, types, tracing, cost, streaming
19
+ ├── mock/ # MockLLM, MockTool, MCP mock, fault injection, fixtures
20
+ ├── replay/ # Record/replay, versioned cassettes, stream cassettes
21
+ ├── eval/ # Metrics: task completion, tool correctness, trajectory, etc.
22
+ ├── safety/ # Attack probes, safety evaluators, compliance reports
23
+ ├── conversation/ # Multi-turn session management
24
+ ├── judge/ # LLM-as-judge, rubrics, statistical verdicts
25
+ ├── ci/ # GitHub Action, GitLab CI, quality gates, reporters
26
+ ├── adapters/ # LangChain, OpenAI Agents SDK, CrewAI, PydanticAI, Anthropic, generic
27
+ ├── datasets/ # Golden dataset loader, schema, generator
28
+ └── cli/ # init, run, demo, record, report, cost, migrate, import-trace
29
+ ```
30
+
31
+ ## Code Conventions
32
+
33
+ ### Async-First
34
+
35
+ Most agent frameworks are async-native. All agent-facing APIs are `async def`. The plugin sets `asyncio_mode = "auto"` so any `async def test_*` just works.
36
+
37
+ Sync agents are supported via the `@wrap` decorator which auto-detects sync callables and runs them in a thread pool executor.
38
+
39
+ ```python
40
+ # Async test — the default
41
+ @pytest.mark.agent_test(layer="mock")
42
+ async def test_my_agent(my_agent, ap_mock_llm):
43
+ result = await my_agent.run("hello")
44
+ assert result.final_output is not None
45
+ ```
46
+
47
+ ### Fixture Naming
48
+
49
+ All fixtures use the `ap_` prefix to avoid conflicts with other pytest plugins:
50
+
51
+ - `ap_mock_llm` — mock LLM provider
52
+ - `ap_mock_tool` — mock tool executor
53
+ - `ap_fault` — fault injection
54
+ - `ap_conversation` — multi-turn conversation session
55
+ - `ap_stream_collector` — streaming event collector
56
+ - `ap_safety` — safety assertion helpers
57
+
58
+ ### Adapters
59
+
60
+ Adapters wrap agent frameworks to conform to the `AgentAdapter` protocol. Rules:
61
+ - Keep each adapter under 200 lines
62
+ - No deep framework integrations — thin wrappers only
63
+ - The `GenericAdapter` handles any Python callable as a fallback
64
+ - Adapters go in `src/checkagent/adapters/`
65
+
66
+ ### Types
67
+
68
+ Core data types live in `src/checkagent/core/types.py`:
69
+ - `AgentRun` — complete execution trace
70
+ - `Step` — single agent step
71
+ - `ToolCall` — tool invocation + result
72
+ - `AgentInput` — input to the agent
73
+ - `StreamEvent` — streaming chunk event
74
+ - `Score` — evaluation score
75
+
76
+ ### Cassettes
77
+
78
+ - Format: **JSON** (not YAML) — smaller diffs, fewer git merge conflicts
79
+ - Filenames: content-addressed (`cassettes/{test_id}/{short_hash}.json`)
80
+ - Include `_meta` block with schema version, timestamps, content hash
81
+ - API keys and secrets are redacted at recording time
82
+ - Versioned format with migration support (`checkagent migrate-cassettes`)
83
+
84
+ ### Configuration
85
+
86
+ All config in `checkagent.yml` at project root, validated with Pydantic. See docs for full schema reference.
87
+
88
+ ## Testing the Framework
89
+
90
+ ```bash
91
+ pytest tests/ # Run all framework tests
92
+ pytest tests/mock/ # Run mock layer tests only
93
+ pytest tests/ -x --tb=short # Stop on first failure
94
+ ```
95
+
96
+ - CI runs on Linux, macOS, and Windows
97
+ - Layer 1 (mock) tests must execute in < 100ms each
98
+ - Layer 2 (replay) tests must execute in < 1s each
99
+ - Zero flaky tests in Layers 1 and 2 — they are deterministic by design
100
+
101
+ ## CLI
102
+
103
+ ```bash
104
+ checkagent init # Scaffold new test project
105
+ checkagent run # Run tests (thin pytest wrapper)
106
+ checkagent demo # Zero-config demo, no API keys needed
107
+ checkagent record <agent> <input> # Record session as cassette
108
+ checkagent report <results> # Generate HTML report
109
+ checkagent cost <results> # Cost breakdown
110
+ checkagent migrate-cassettes [dir] # Upgrade cassette format
111
+ checkagent import-trace --source ... # Import production traces
112
+ checkagent dataset validate <file> # Validate golden dataset
113
+ ```
114
+
115
+ ## Dependencies
116
+
117
+ **Required:**
118
+ - `pytest` >= 7.0
119
+ - `pytest-asyncio` >= 0.23
120
+ - `pluggy` >= 1.0
121
+ - `pydantic` >= 2.0
122
+ - `click`
123
+ - `rich`
124
+
125
+ **Optional:**
126
+ - `opentelemetry-api` — trace emission
127
+ - `dirty-equals` — structured output fuzzy matching
128
+ - `deepdiff` — detailed failure diagnostics
129
+ - `spacy` — NER-based PII detection (for trace import)
130
+
131
+ ## Plugin System
132
+
133
+ Community extensions are separate PyPI packages that auto-register via entry points:
134
+
135
+ ```toml
136
+ # pyproject.toml for a community plugin
137
+ [project.entry-points."checkagent.adapters"]
138
+ my_framework = "checkagent_myframework:MyAdapter"
139
+ ```
140
+
141
+ Entry point groups: `checkagent.adapters`, `checkagent.evaluators`, `checkagent.safety`, `checkagent.judges`
142
+
143
+ ## Contributing
144
+
145
+ See CONTRIBUTING.md for guidelines. Key points:
146
+ - All PRs need tests
147
+ - Run `pytest tests/` locally before pushing
148
+ - Adapters belong in `src/checkagent/adapters/` (core) or as separate packages (community)
149
+ - Safety probes must be non-destructive
@@ -0,0 +1,27 @@
1
+ # Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation.
6
+
7
+ ## Our Standards
8
+
9
+ **Positive behavior:**
10
+ - Being respectful of differing viewpoints and experiences
11
+ - Giving and gracefully accepting constructive feedback
12
+ - Focusing on what is best for the community
13
+ - Showing empathy towards other community members
14
+
15
+ **Unacceptable behavior:**
16
+ - Trolling, insulting/derogatory comments, and personal or political attacks
17
+ - Public or private harassment
18
+ - Publishing others' private information without explicit permission
19
+ - Other conduct which could reasonably be considered inappropriate in a professional setting
20
+
21
+ ## Enforcement
22
+
23
+ Project maintainers are responsible for clarifying and enforcing standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
24
+
25
+ ## Attribution
26
+
27
+ This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
@@ -0,0 +1,68 @@
1
+ # Contributing to CheckAgent
2
+
3
+ Thanks for your interest in CheckAgent! We're building this in public and welcome contributions from day one.
4
+
5
+ ## Getting Started
6
+
7
+ ```bash
8
+ git clone https://github.com/xydac/checkagent.git
9
+ cd checkagent
10
+ python -m venv .venv
11
+ source .venv/bin/activate
12
+ pip install -e ".[dev]"
13
+ pytest tests/
14
+ ```
15
+
16
+ ## Development Process
17
+
18
+ 1. Check existing issues or open a new one to discuss your idea
19
+ 2. Fork the repo and create a branch from `main`
20
+ 3. Write your code and tests
21
+ 4. Run `pytest tests/` and make sure everything passes
22
+ 5. Submit a PR with a clear description of what and why
23
+
24
+ ## Code Conventions
25
+
26
+ - **Async-first** — all agent-facing APIs use `async def`
27
+ - **Fixtures** use `ap_` prefix (e.g., `ap_mock_llm`, `ap_fault`)
28
+ - **Adapters** stay under 200 lines — thin wrappers, not deep integrations
29
+ - **Types** go in `src/checkagent/core/types.py`
30
+ - **Tests** mirror the source layout in `tests/`
31
+
32
+ ## Testing
33
+
34
+ - Layer 1 (mock) tests must run in < 100ms
35
+ - Layer 2 (replay) tests must run in < 1s
36
+ - Zero flaky tests in Layers 1 and 2
37
+ - Run `pytest tests/` before submitting a PR
38
+
39
+ ## Commit Messages
40
+
41
+ Use clear, imperative commit messages:
42
+ - `Add mock LLM streaming support`
43
+ - `Fix cassette replay matching for tool calls`
44
+ - `Update safety probe templates for indirect injection`
45
+
46
+ ## DCO Sign-Off
47
+
48
+ We use the Developer Certificate of Origin (DCO). Sign off your commits:
49
+
50
+ ```bash
51
+ git commit -s -m "Your commit message"
52
+ ```
53
+
54
+ This adds `Signed-off-by: Your Name <your@email.com>` to the commit.
55
+
56
+ ## What to Contribute
57
+
58
+ - **Bug reports** — open an issue with reproduction steps
59
+ - **Bug fixes** — PRs welcome, include a test that catches the bug
60
+ - **New evaluators** — custom metrics via the plugin interface
61
+ - **New adapters** — framework integrations (prefer as separate packages for community adapters)
62
+ - **Safety probes** — new attack templates (must be non-destructive)
63
+ - **Documentation** — guides, examples, typo fixes
64
+ - **Examples** — real-world agent test suites
65
+
66
+ ## License
67
+
68
+ By contributing, you agree that your contributions will be licensed under Apache-2.0.
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to the Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by the Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding any notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright 2026 CheckAgent Contributors
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.