pytest-llm-assert 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytest_llm_assert-0.1.0/.env.example +27 -0
- pytest_llm_assert-0.1.0/.github/workflows/ci.yml +105 -0
- pytest_llm_assert-0.1.0/.github/workflows/release.yml +110 -0
- pytest_llm_assert-0.1.0/.gitignore +62 -0
- pytest_llm_assert-0.1.0/LICENSE +21 -0
- pytest_llm_assert-0.1.0/PKG-INFO +246 -0
- pytest_llm_assert-0.1.0/README.md +216 -0
- pytest_llm_assert-0.1.0/examples/README.md +65 -0
- pytest_llm_assert-0.1.0/examples/pytest_conftest.py +126 -0
- pytest_llm_assert-0.1.0/examples/test_basic.py +69 -0
- pytest_llm_assert-0.1.0/examples/test_compare_models.py +78 -0
- pytest_llm_assert-0.1.0/pyproject.toml +72 -0
- pytest_llm_assert-0.1.0/src/pytest_llm_assert/__init__.py +6 -0
- pytest_llm_assert-0.1.0/src/pytest_llm_assert/core.py +186 -0
- pytest_llm_assert-0.1.0/src/pytest_llm_assert/plugin.py +43 -0
- pytest_llm_assert-0.1.0/tests/__init__.py +0 -0
- pytest_llm_assert-0.1.0/tests/integration/__init__.py +0 -0
- pytest_llm_assert-0.1.0/tests/integration/test_llm_integration.py +222 -0
- pytest_llm_assert-0.1.0/tests/unit/__init__.py +0 -0
- pytest_llm_assert-0.1.0/tests/unit/test_llm_assert.py +214 -0
- pytest_llm_assert-0.1.0/uv.lock +2035 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# pytest-llm-assert environment configuration
|
|
2
|
+
# Copy this file to .env and fill in your values
|
|
3
|
+
|
|
4
|
+
# =============================================================================
|
|
5
|
+
# Azure OpenAI (with Entra ID / Azure AD)
|
|
6
|
+
# =============================================================================
|
|
7
|
+
# pip install pytest-llm-assert[azure]
|
|
8
|
+
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
|
|
9
|
+
AZURE_OPENAI_DEPLOYMENT=gpt-5-mini
|
|
10
|
+
|
|
11
|
+
# =============================================================================
|
|
12
|
+
# Google Vertex AI
|
|
13
|
+
# =============================================================================
|
|
14
|
+
# Run: gcloud auth application-default login
|
|
15
|
+
GCP_PROJECT_ID=your-project-id
|
|
16
|
+
GCP_LOCATION=us-central1
|
|
17
|
+
VERTEX_MODEL=gemini-2.0-flash
|
|
18
|
+
|
|
19
|
+
# =============================================================================
|
|
20
|
+
# OpenAI (direct)
|
|
21
|
+
# =============================================================================
|
|
22
|
+
# OPENAI_API_KEY=sk-...
|
|
23
|
+
|
|
24
|
+
# =============================================================================
|
|
25
|
+
# Anthropic
|
|
26
|
+
# =============================================================================
|
|
27
|
+
# ANTHROPIC_API_KEY=sk-ant-...
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
lint:
|
|
12
|
+
name: Lint
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v4
|
|
19
|
+
with:
|
|
20
|
+
version: "latest"
|
|
21
|
+
|
|
22
|
+
- name: Set up Python
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: "3.13"
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --all-extras
|
|
29
|
+
|
|
30
|
+
- name: Run ruff linter
|
|
31
|
+
run: uv run ruff check .
|
|
32
|
+
|
|
33
|
+
- name: Run ruff formatter check
|
|
34
|
+
run: uv run ruff format --check .
|
|
35
|
+
|
|
36
|
+
type-check:
|
|
37
|
+
name: Type Check
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
|
|
42
|
+
- name: Install uv
|
|
43
|
+
uses: astral-sh/setup-uv@v4
|
|
44
|
+
with:
|
|
45
|
+
version: "latest"
|
|
46
|
+
|
|
47
|
+
- name: Set up Python
|
|
48
|
+
uses: actions/setup-python@v5
|
|
49
|
+
with:
|
|
50
|
+
python-version: "3.13"
|
|
51
|
+
|
|
52
|
+
- name: Install dependencies
|
|
53
|
+
run: uv sync --all-extras
|
|
54
|
+
|
|
55
|
+
- name: Run pyright
|
|
56
|
+
run: uv run pyright src/pytest_llm_assert/
|
|
57
|
+
|
|
58
|
+
test:
|
|
59
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
60
|
+
runs-on: ubuntu-latest
|
|
61
|
+
strategy:
|
|
62
|
+
fail-fast: false
|
|
63
|
+
matrix:
|
|
64
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
65
|
+
steps:
|
|
66
|
+
- uses: actions/checkout@v4
|
|
67
|
+
|
|
68
|
+
- name: Install uv
|
|
69
|
+
uses: astral-sh/setup-uv@v4
|
|
70
|
+
with:
|
|
71
|
+
version: "latest"
|
|
72
|
+
|
|
73
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
74
|
+
uses: actions/setup-python@v5
|
|
75
|
+
with:
|
|
76
|
+
python-version: ${{ matrix.python-version }}
|
|
77
|
+
|
|
78
|
+
- name: Install dependencies
|
|
79
|
+
run: uv sync --all-extras
|
|
80
|
+
|
|
81
|
+
- name: Run unit tests
|
|
82
|
+
run: uv run pytest tests/unit/ -v --tb=short
|
|
83
|
+
|
|
84
|
+
test-smoke:
|
|
85
|
+
name: Smoke Test
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
steps:
|
|
88
|
+
- uses: actions/checkout@v4
|
|
89
|
+
|
|
90
|
+
- name: Install uv
|
|
91
|
+
uses: astral-sh/setup-uv@v4
|
|
92
|
+
with:
|
|
93
|
+
version: "latest"
|
|
94
|
+
|
|
95
|
+
- name: Set up Python
|
|
96
|
+
uses: actions/setup-python@v5
|
|
97
|
+
with:
|
|
98
|
+
python-version: "3.13"
|
|
99
|
+
|
|
100
|
+
- name: Install dependencies
|
|
101
|
+
run: uv sync --all-extras
|
|
102
|
+
|
|
103
|
+
- name: Verify package imports
|
|
104
|
+
run: |
|
|
105
|
+
uv run python -c "from pytest_llm_assert import LLMAssert; print('Import successful')"
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v[0-9]*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
name: Build Package
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Install uv
|
|
16
|
+
uses: astral-sh/setup-uv@v4
|
|
17
|
+
with:
|
|
18
|
+
version: "latest"
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.13"
|
|
24
|
+
|
|
25
|
+
- name: Extract version from tag
|
|
26
|
+
id: get_version
|
|
27
|
+
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT
|
|
28
|
+
|
|
29
|
+
- name: Update version in pyproject.toml
|
|
30
|
+
run: |
|
|
31
|
+
sed -i 's/^version = ".*"/version = "${{ steps.get_version.outputs.VERSION }}"/' pyproject.toml
|
|
32
|
+
echo "Updated version to ${{ steps.get_version.outputs.VERSION }}"
|
|
33
|
+
grep "^version" pyproject.toml
|
|
34
|
+
|
|
35
|
+
- name: Install build dependencies
|
|
36
|
+
run: uv sync --dev
|
|
37
|
+
|
|
38
|
+
- name: Build package
|
|
39
|
+
run: uv build
|
|
40
|
+
|
|
41
|
+
- name: Upload build artifacts
|
|
42
|
+
uses: actions/upload-artifact@v4
|
|
43
|
+
with:
|
|
44
|
+
name: dist
|
|
45
|
+
path: dist/
|
|
46
|
+
|
|
47
|
+
test:
|
|
48
|
+
name: Test Build
|
|
49
|
+
runs-on: ubuntu-latest
|
|
50
|
+
needs: build
|
|
51
|
+
steps:
|
|
52
|
+
- name: Download build artifacts
|
|
53
|
+
uses: actions/download-artifact@v4
|
|
54
|
+
with:
|
|
55
|
+
name: dist
|
|
56
|
+
path: dist/
|
|
57
|
+
|
|
58
|
+
- name: Set up Python
|
|
59
|
+
uses: actions/setup-python@v5
|
|
60
|
+
with:
|
|
61
|
+
python-version: "3.13"
|
|
62
|
+
|
|
63
|
+
- name: Install package from wheel
|
|
64
|
+
run: pip install dist/*.whl
|
|
65
|
+
|
|
66
|
+
- name: Verify installation
|
|
67
|
+
run: |
|
|
68
|
+
python -c "from pytest_llm_assert import LLMAssert; print('Import successful')"
|
|
69
|
+
|
|
70
|
+
publish-pypi:
|
|
71
|
+
name: Publish to PyPI
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
needs: [build, test]
|
|
74
|
+
environment:
|
|
75
|
+
name: pypi
|
|
76
|
+
url: https://pypi.org/project/pytest-llm-assert/
|
|
77
|
+
permissions:
|
|
78
|
+
id-token: write
|
|
79
|
+
steps:
|
|
80
|
+
- name: Download build artifacts
|
|
81
|
+
uses: actions/download-artifact@v4
|
|
82
|
+
with:
|
|
83
|
+
name: dist
|
|
84
|
+
path: dist/
|
|
85
|
+
|
|
86
|
+
- name: Publish to PyPI
|
|
87
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
88
|
+
|
|
89
|
+
github-release:
|
|
90
|
+
name: Create GitHub Release
|
|
91
|
+
runs-on: ubuntu-latest
|
|
92
|
+
needs: [publish-pypi]
|
|
93
|
+
permissions:
|
|
94
|
+
contents: write
|
|
95
|
+
steps:
|
|
96
|
+
- uses: actions/checkout@v4
|
|
97
|
+
|
|
98
|
+
- name: Download build artifacts
|
|
99
|
+
uses: actions/download-artifact@v4
|
|
100
|
+
with:
|
|
101
|
+
name: dist
|
|
102
|
+
path: dist/
|
|
103
|
+
|
|
104
|
+
- name: Create GitHub Release
|
|
105
|
+
uses: softprops/action-gh-release@v2
|
|
106
|
+
with:
|
|
107
|
+
files: dist/*
|
|
108
|
+
generate_release_notes: true
|
|
109
|
+
draft: false
|
|
110
|
+
prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
*.egg
|
|
21
|
+
*.manifest
|
|
22
|
+
*.spec
|
|
23
|
+
|
|
24
|
+
# Virtual environments
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
ENV/
|
|
28
|
+
env/
|
|
29
|
+
|
|
30
|
+
# IDE
|
|
31
|
+
.idea/
|
|
32
|
+
.vscode/
|
|
33
|
+
*.swp
|
|
34
|
+
*.swo
|
|
35
|
+
*~
|
|
36
|
+
.project
|
|
37
|
+
.pydevproject
|
|
38
|
+
.settings/
|
|
39
|
+
|
|
40
|
+
# Testing
|
|
41
|
+
.pytest_cache/
|
|
42
|
+
.coverage
|
|
43
|
+
.coverage.*
|
|
44
|
+
htmlcov/
|
|
45
|
+
.tox/
|
|
46
|
+
.nox/
|
|
47
|
+
.hypothesis/
|
|
48
|
+
.ruff_cache/
|
|
49
|
+
.mypy_cache/
|
|
50
|
+
|
|
51
|
+
# Environment
|
|
52
|
+
.env
|
|
53
|
+
.env.local
|
|
54
|
+
.env.*.local
|
|
55
|
+
|
|
56
|
+
# OS
|
|
57
|
+
.DS_Store
|
|
58
|
+
Thumbs.db
|
|
59
|
+
*.log
|
|
60
|
+
|
|
61
|
+
# Jupyter
|
|
62
|
+
.ipynb_checkpoints/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Stefan Broenner
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pytest-llm-assert
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Simple LLM-powered assertions for any pytest test
|
|
5
|
+
Project-URL: Homepage, https://github.com/sbroenne/pytest-llm-assert
|
|
6
|
+
Project-URL: Documentation, https://github.com/sbroenne/pytest-llm-assert#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/sbroenne/pytest-llm-assert
|
|
8
|
+
Author: Stefan Broenner
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,assertions,llm,pytest,testing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Framework :: Pytest
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: azure-identity>=1.15
|
|
22
|
+
Requires-Dist: litellm>=1.55
|
|
23
|
+
Requires-Dist: pytest>=8.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pyright>=1.1; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# pytest-llm-assert
|
|
32
|
+
|
|
33
|
+
**Natural language assertions for pytest.**
|
|
34
|
+
|
|
35
|
+
A pytest plugin that lets you write semantic assertions using LLMs. Stop writing brittle string checks — let an LLM understand what you actually mean.
|
|
36
|
+
|
|
37
|
+
## The Problem
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
# ❌ These all fail even though they mean "success":
|
|
41
|
+
assert "success" in response # Fails on "Succeeded", "successful", "It worked!"
|
|
42
|
+
assert response == "Operation completed successfully" # Exact match? Really?
|
|
43
|
+
assert re.match(r"success|succeeded|worked", response, re.I) # Regex hell
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
# You're testing a text-to-SQL agent. How do you validate the output?
|
|
48
|
+
|
|
49
|
+
# ❌ Exact match? There are many valid ways to write the same query:
|
|
50
|
+
assert sql == "SELECT name FROM users WHERE age > 21"
|
|
51
|
+
|
|
52
|
+
# ❌ Regex? Good luck covering all valid SQL syntax:
|
|
53
|
+
assert re.match(r"SELECT\s+name\s+FROM\s+users", sql, re.I)
|
|
54
|
+
|
|
55
|
+
# ❌ Parse it? Now you need a SQL parser as a test dependency:
|
|
56
|
+
assert sqlparse.parse(sql)[0].get_type() == "SELECT"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## The Solution
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
# ✅ Just say what you mean:
|
|
63
|
+
assert llm(response, "Does this indicate the operation succeeded?")
|
|
64
|
+
assert llm(sql, "Is this a valid SELECT query that returns user names for users over 21?")
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Why This Works
|
|
68
|
+
|
|
69
|
+
The LLM evaluates your criterion against the content and returns a judgment. It understands:
|
|
70
|
+
|
|
71
|
+
- **Synonyms**: "success", "succeeded", "worked", "completed" all mean the same thing
|
|
72
|
+
- **Semantics**: Two SQL queries can be equivalent even with different syntax
|
|
73
|
+
- **Context**: "The operation failed successfully" is actually a failure
|
|
74
|
+
- **Intent**: Generated code can be correct even if it's not identical to a reference
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
## Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install pytest-llm-assert
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Setup
|
|
84
|
+
|
|
85
|
+
This library uses [LiteLLM](https://docs.litellm.ai/) under the hood, giving you access to **100+ LLM providers** with a unified API.
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# OpenAI
|
|
89
|
+
export OPENAI_API_KEY=sk-...
|
|
90
|
+
|
|
91
|
+
# Azure OpenAI with Entra ID (no API keys)
|
|
92
|
+
export AZURE_API_BASE=https://your-resource.openai.azure.com
|
|
93
|
+
export AZURE_API_VERSION=2024-02-15-preview
|
|
94
|
+
# Uses DefaultAzureCredential: az login, managed identity, etc.
|
|
95
|
+
|
|
96
|
+
# Ollama (local)
|
|
97
|
+
# Just run: ollama serve
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
See [LiteLLM docs](https://docs.litellm.ai/docs/providers) for all providers including Vertex AI, Bedrock, Anthropic, and more.
|
|
101
|
+
|
|
102
|
+
## Quick Start
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from pytest_llm_assert import LLMAssert
|
|
106
|
+
|
|
107
|
+
llm = LLMAssert(model="openai/gpt-5-mini") # Uses OPENAI_API_KEY from env
|
|
108
|
+
|
|
109
|
+
# Semantic assertions - returns True/False
|
|
110
|
+
assert llm("Operation completed successfully", "Does this indicate success?")
|
|
111
|
+
assert llm("Error: connection refused", "Does this indicate a failure?")
|
|
112
|
+
assert not llm("All tests passed", "Does this indicate a failure?")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Real Examples
|
|
116
|
+
|
|
117
|
+
First, create a fixture in `conftest.py`:
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
# conftest.py
|
|
121
|
+
import pytest
|
|
122
|
+
from pytest_llm_assert import LLMAssert
|
|
123
|
+
|
|
124
|
+
@pytest.fixture
|
|
125
|
+
def llm():
|
|
126
|
+
return LLMAssert(model="openai/gpt-5-mini")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Then use it in your tests:
|
|
130
|
+
|
|
131
|
+
### Testing Error Messages
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
def test_validation_error_is_helpful(llm):
|
|
135
|
+
"""Error messages should explain the problem clearly."""
|
|
136
|
+
error_msg = "ValidationError: 'port' must be an integer, got 'not-a-number'"
|
|
137
|
+
|
|
138
|
+
assert llm(error_msg, "Does this explain that port must be a number?")
|
|
139
|
+
assert llm(error_msg, "Does this indicate which field failed validation?")
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Testing Generated SQL
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
def test_query_builder_generates_valid_sql(llm):
|
|
146
|
+
"""Query builder should produce semantically correct SQL."""
|
|
147
|
+
query = "SELECT name FROM users WHERE age > 21 ORDER BY name"
|
|
148
|
+
|
|
149
|
+
assert llm(query, "Is this a valid SELECT query that returns names of users over 21?")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Testing LLM Output
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
def test_summary_is_comprehensive(llm):
|
|
156
|
+
"""Generated summaries should capture key points."""
|
|
157
|
+
summary = "The contract establishes a 2-year service agreement between..."
|
|
158
|
+
|
|
159
|
+
assert llm(summary, "Does this summarize a legal contract?")
|
|
160
|
+
assert llm(summary, "Does this mention the contract duration?")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Comparing Judge Models
|
|
164
|
+
|
|
165
|
+
Not sure which LLM to use as your assertion judge? Run the same tests against multiple models to find the best one for your use case:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
import pytest
|
|
169
|
+
from pytest_llm_assert import LLMAssert
|
|
170
|
+
|
|
171
|
+
MODELS = ["openai/gpt-5-mini", "anthropic/claude-sonnet-4-20250514", "ollama/llama3.1:8b"]
|
|
172
|
+
|
|
173
|
+
@pytest.fixture(params=MODELS)
|
|
174
|
+
def llm(request):
|
|
175
|
+
return LLMAssert(model=request.param)
|
|
176
|
+
|
|
177
|
+
def test_validates_sql_equivalence(llm):
|
|
178
|
+
"""Test which models can judge SQL semantic equivalence."""
|
|
179
|
+
sql = "SELECT u.name FROM users AS u WHERE u.age >= 22"
|
|
180
|
+
assert llm(sql, "Is this equivalent to selecting names of users over 21?")
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Output shows which judge models correctly evaluate your criterion:
|
|
184
|
+
```
|
|
185
|
+
test_validates_sql_equivalence[openai/gpt-5-mini] PASSED
|
|
186
|
+
test_validates_sql_equivalence[anthropic/claude-sonnet-4-20250514] PASSED
|
|
187
|
+
test_validates_sql_equivalence[ollama/llama3.1:8b] FAILED
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
> **Note:** This tests which LLM makes a good *judge* for your assertions. To test AI agents themselves (e.g., "does my coding agent produce working code?"), see [pytest-aitest](https://github.com/sbroenne/pytest-aitest).
|
|
191
|
+
|
|
192
|
+
## Configuration
|
|
193
|
+
|
|
194
|
+
### Programmatic
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from pytest_llm_assert import LLMAssert
|
|
198
|
+
|
|
199
|
+
llm = LLMAssert(
|
|
200
|
+
model="openai/gpt-5-mini",
|
|
201
|
+
api_key="sk-...", # Or use env var
|
|
202
|
+
api_base="https://...", # Custom endpoint
|
|
203
|
+
)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### CLI Options
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
pytest --llm-model=openai/gpt-5-mini
|
|
210
|
+
pytest --llm-api-key='${OPENAI_API_KEY}' # Env var expansion
|
|
211
|
+
pytest --llm-api-base=http://localhost:8080
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Environment Variables
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
export OPENAI_API_KEY=sk-...
|
|
218
|
+
export LLM_MODEL=openai/gpt-5-mini
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## API Reference
|
|
222
|
+
|
|
223
|
+
### `LLMAssert(model, api_key=None, api_base=None, **kwargs)`
|
|
224
|
+
|
|
225
|
+
Create an LLM assertion helper.
|
|
226
|
+
|
|
227
|
+
- `model`: LiteLLM model string (e.g., `"openai/gpt-5-mini"`, `"azure/gpt-4o"`)
|
|
228
|
+
- `api_key`: Optional API key (or use environment variables)
|
|
229
|
+
- `api_base`: Optional custom endpoint
|
|
230
|
+
- `**kwargs`: Additional parameters passed to LiteLLM
|
|
231
|
+
|
|
232
|
+
### `llm(content, criterion) -> AssertionResult`
|
|
233
|
+
|
|
234
|
+
Evaluate if content meets the criterion.
|
|
235
|
+
|
|
236
|
+
- Returns `AssertionResult` which is truthy if criterion is met
|
|
237
|
+
- Access `.reasoning` for the LLM's explanation
|
|
238
|
+
|
|
239
|
+
## See Also
|
|
240
|
+
|
|
241
|
+
- **[Examples](examples/)** — Example pytest tests showing basic usage, model comparison, and fixture patterns
|
|
242
|
+
- **[pytest-aitest](https://github.com/sbroenne/pytest-aitest)** — Full framework for testing MCP servers, CLIs, and AI agents. Uses pytest-llm-assert for the judge.
|
|
243
|
+
|
|
244
|
+
## License
|
|
245
|
+
|
|
246
|
+
MIT
|