pytest-llm-assert 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytest_llm_assert/__init__.py +6 -0
- pytest_llm_assert/core.py +186 -0
- pytest_llm_assert/plugin.py +43 -0
- pytest_llm_assert-0.1.0.dist-info/METADATA +246 -0
- pytest_llm_assert-0.1.0.dist-info/RECORD +8 -0
- pytest_llm_assert-0.1.0.dist-info/WHEEL +4 -0
- pytest_llm_assert-0.1.0.dist-info/entry_points.txt +2 -0
- pytest_llm_assert-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Core LLM assertion implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import TYPE_CHECKING, Callable
|
|
10
|
+
|
|
11
|
+
import litellm
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(slots=True)
|
|
18
|
+
class AssertionResult:
|
|
19
|
+
"""Result of an LLM assertion with rich repr for pytest."""
|
|
20
|
+
|
|
21
|
+
passed: bool
|
|
22
|
+
criterion: str
|
|
23
|
+
reasoning: str
|
|
24
|
+
content_preview: str
|
|
25
|
+
|
|
26
|
+
def __bool__(self) -> bool:
|
|
27
|
+
return self.passed
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
status = "PASS" if self.passed else "FAIL"
|
|
31
|
+
return (
|
|
32
|
+
f"LLMAssert({status}: {self.criterion!r})\n"
|
|
33
|
+
f" Content: {self.content_preview!r}\n"
|
|
34
|
+
f" Reasoning: {self.reasoning}"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LLMAssert:
|
|
39
|
+
"""LLM-powered assertions for semantic evaluation.
|
|
40
|
+
|
|
41
|
+
Example:
|
|
42
|
+
>>> llm = LLMAssert(model="openai/gpt-5-mini")
|
|
43
|
+
>>> assert llm("Hello world", "Is this a greeting?")
|
|
44
|
+
|
|
45
|
+
For Azure OpenAI with Entra ID, just use `az login` - no API key needed:
|
|
46
|
+
>>> llm = LLMAssert(model="azure/gpt-4o", api_base="https://your-resource.openai.azure.com")
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
model: str = "openai/gpt-5-mini",
|
|
52
|
+
api_key: str | None = None,
|
|
53
|
+
api_base: str | None = None,
|
|
54
|
+
**kwargs: Any,
|
|
55
|
+
) -> None:
|
|
56
|
+
"""Initialize LLM assertion helper.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
model: LiteLLM model string (e.g., "openai/gpt-5-mini", "azure/gpt-4o")
|
|
60
|
+
api_key: API key (supports ${ENV_VAR} expansion). For Azure, leave empty to use Entra ID.
|
|
61
|
+
api_base: Custom API base URL (required for Azure)
|
|
62
|
+
**kwargs: Additional parameters passed to LiteLLM
|
|
63
|
+
"""
|
|
64
|
+
self.model = model
|
|
65
|
+
self.api_key = self._expand_env(api_key) if api_key else None
|
|
66
|
+
self.api_base = api_base
|
|
67
|
+
self.kwargs = kwargs
|
|
68
|
+
self._azure_ad_token_provider: Callable[[], str] | None = None
|
|
69
|
+
|
|
70
|
+
# Auto-configure Azure Entra ID when no API key is provided
|
|
71
|
+
if self._is_azure_model() and not self._has_azure_api_key():
|
|
72
|
+
self._azure_ad_token_provider = self._get_azure_ad_token_provider()
|
|
73
|
+
|
|
74
|
+
def _is_azure_model(self) -> bool:
|
|
75
|
+
"""Check if the model is an Azure OpenAI model."""
|
|
76
|
+
return self.model.startswith("azure/")
|
|
77
|
+
|
|
78
|
+
def _has_azure_api_key(self) -> bool:
|
|
79
|
+
"""Check if an Azure API key is available."""
|
|
80
|
+
return bool(self.api_key or os.environ.get("AZURE_API_KEY"))
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def _get_azure_ad_token_provider() -> Callable[[], str] | None:
|
|
84
|
+
"""Get Azure AD token provider for Entra ID authentication.
|
|
85
|
+
|
|
86
|
+
Uses LiteLLM's built-in helper which leverages DefaultAzureCredential:
|
|
87
|
+
- Azure CLI credentials (az login)
|
|
88
|
+
- Managed Identity
|
|
89
|
+
- Environment variables (AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_TENANT_ID)
|
|
90
|
+
- Visual Studio Code credentials
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
from litellm.secret_managers.get_azure_ad_token_provider import (
|
|
94
|
+
get_azure_ad_token_provider,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return get_azure_ad_token_provider()
|
|
98
|
+
except ImportError:
|
|
99
|
+
# azure-identity not installed
|
|
100
|
+
return None
|
|
101
|
+
except Exception:
|
|
102
|
+
# Credential not available
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def _expand_env(value: str) -> str:
|
|
107
|
+
"""Expand ${VAR} patterns in string."""
|
|
108
|
+
pattern = r"\$\{([^}]+)\}"
|
|
109
|
+
return re.sub(pattern, lambda m: os.environ.get(m.group(1), m.group(0)), value)
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def _truncate(text: str, max_len: int = 100) -> str:
|
|
113
|
+
"""Truncate text for display."""
|
|
114
|
+
if len(text) <= max_len:
|
|
115
|
+
return text
|
|
116
|
+
return text[: max_len - 3] + "..."
|
|
117
|
+
|
|
118
|
+
def _call_llm(self, messages: list[dict[str, str]]) -> str:
|
|
119
|
+
"""Call the LLM and return response content."""
|
|
120
|
+
kwargs = {**self.kwargs}
|
|
121
|
+
|
|
122
|
+
# Use Azure AD token provider if configured (Entra ID auth)
|
|
123
|
+
if self._azure_ad_token_provider is not None:
|
|
124
|
+
kwargs["azure_ad_token_provider"] = self._azure_ad_token_provider
|
|
125
|
+
|
|
126
|
+
response = litellm.completion(
|
|
127
|
+
model=self.model,
|
|
128
|
+
messages=messages,
|
|
129
|
+
api_key=self.api_key,
|
|
130
|
+
api_base=self.api_base,
|
|
131
|
+
**kwargs,
|
|
132
|
+
)
|
|
133
|
+
return response.choices[0].message.content or "" # type: ignore[union-attr]
|
|
134
|
+
|
|
135
|
+
def __call__(self, content: str, criterion: str) -> AssertionResult:
|
|
136
|
+
"""Evaluate if content meets the given criterion.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
content: The text to evaluate
|
|
140
|
+
criterion: Plain English criterion (e.g., "Is this professional?")
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
AssertionResult that is truthy if criterion is met
|
|
144
|
+
"""
|
|
145
|
+
messages = [
|
|
146
|
+
{
|
|
147
|
+
"role": "system",
|
|
148
|
+
"content": (
|
|
149
|
+
"You are an assertion evaluator. "
|
|
150
|
+
"Evaluate if the given content meets the specified criterion.\n\n"
|
|
151
|
+
"Respond in JSON format:\n"
|
|
152
|
+
'{"result": "PASS" or "FAIL", "reasoning": "brief explanation"}'
|
|
153
|
+
),
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
"role": "user",
|
|
157
|
+
"content": f"Criterion: {criterion}\n\nContent:\n{content}",
|
|
158
|
+
},
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
response = self._call_llm(messages)
|
|
162
|
+
|
|
163
|
+
# Parse JSON response
|
|
164
|
+
try:
|
|
165
|
+
# Handle potential markdown code blocks
|
|
166
|
+
text = response.strip()
|
|
167
|
+
if text.startswith("```"):
|
|
168
|
+
text = text.split("```")[1]
|
|
169
|
+
if text.startswith("json"):
|
|
170
|
+
text = text[4:]
|
|
171
|
+
data = json.loads(text.strip())
|
|
172
|
+
passed = data.get("result", "").upper() == "PASS"
|
|
173
|
+
reasoning = data.get("reasoning", "")
|
|
174
|
+
except (json.JSONDecodeError, AttributeError):
|
|
175
|
+
# Fallback to line-based parsing
|
|
176
|
+
lines = response.strip().split("\n", 1)
|
|
177
|
+
first_line = lines[0].strip().upper()
|
|
178
|
+
passed = first_line in ("PASS", "YES", "TRUE", "PASSED")
|
|
179
|
+
reasoning = lines[1].strip() if len(lines) > 1 else response
|
|
180
|
+
|
|
181
|
+
return AssertionResult(
|
|
182
|
+
passed=passed,
|
|
183
|
+
criterion=criterion,
|
|
184
|
+
reasoning=reasoning,
|
|
185
|
+
content_preview=self._truncate(content),
|
|
186
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""pytest plugin providing LLM-powered assertion fixtures."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from pytest_llm_assert.core import LLMAssert
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def pytest_addoption(parser: pytest.Parser) -> None:
|
|
11
|
+
"""Add pytest CLI options for LLM assertions."""
|
|
12
|
+
group = parser.getgroup("llm-assert", "LLM-powered assertions")
|
|
13
|
+
group.addoption(
|
|
14
|
+
"--llm-model",
|
|
15
|
+
default="openai/gpt-5-mini",
|
|
16
|
+
help="Default LiteLLM model for assertions (default: openai/gpt-5-mini)",
|
|
17
|
+
)
|
|
18
|
+
group.addoption(
|
|
19
|
+
"--llm-api-key",
|
|
20
|
+
default=None,
|
|
21
|
+
help="API key for LLM provider (supports ${ENV_VAR} expansion)",
|
|
22
|
+
)
|
|
23
|
+
group.addoption(
|
|
24
|
+
"--llm-api-base",
|
|
25
|
+
default=None,
|
|
26
|
+
help="Custom API base URL for LLM provider",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def llm_assert(request: pytest.FixtureRequest) -> LLMAssert:
|
|
32
|
+
"""Fixture providing an LLMAssert instance configured via CLI options.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
def test_greeting(llm_assert):
|
|
36
|
+
response = "Hello! How can I help you today?"
|
|
37
|
+
assert llm_assert(response, "Is this a friendly greeting?")
|
|
38
|
+
"""
|
|
39
|
+
return LLMAssert(
|
|
40
|
+
model=request.config.getoption("--llm-model"),
|
|
41
|
+
api_key=request.config.getoption("--llm-api-key"),
|
|
42
|
+
api_base=request.config.getoption("--llm-api-base"),
|
|
43
|
+
)
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pytest-llm-assert
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Simple LLM-powered assertions for any pytest test
|
|
5
|
+
Project-URL: Homepage, https://github.com/sbroenne/pytest-llm-assert
|
|
6
|
+
Project-URL: Documentation, https://github.com/sbroenne/pytest-llm-assert#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/sbroenne/pytest-llm-assert
|
|
8
|
+
Author: Stefan Broenner
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,assertions,llm,pytest,testing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Framework :: Pytest
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: azure-identity>=1.15
|
|
22
|
+
Requires-Dist: litellm>=1.55
|
|
23
|
+
Requires-Dist: pytest>=8.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pyright>=1.1; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# pytest-llm-assert
|
|
32
|
+
|
|
33
|
+
**Natural language assertions for pytest.**
|
|
34
|
+
|
|
35
|
+
A pytest plugin that lets you write semantic assertions using LLMs. Stop writing brittle string checks — let an LLM understand what you actually mean.
|
|
36
|
+
|
|
37
|
+
## The Problem
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
# ❌ These all fail even though they mean "success":
|
|
41
|
+
assert "success" in response # Fails on "Succeeded", "successful", "It worked!"
|
|
42
|
+
assert response == "Operation completed successfully" # Exact match? Really?
|
|
43
|
+
assert re.match(r"success|succeeded|worked", response, re.I) # Regex hell
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
# You're testing a text-to-SQL agent. How do you validate the output?
|
|
48
|
+
|
|
49
|
+
# ❌ Exact match? There are many valid ways to write the same query:
|
|
50
|
+
assert sql == "SELECT name FROM users WHERE age > 21"
|
|
51
|
+
|
|
52
|
+
# ❌ Regex? Good luck covering all valid SQL syntax:
|
|
53
|
+
assert re.match(r"SELECT\s+name\s+FROM\s+users", sql, re.I)
|
|
54
|
+
|
|
55
|
+
# ❌ Parse it? Now you need a SQL parser as a test dependency:
|
|
56
|
+
assert sqlparse.parse(sql)[0].get_type() == "SELECT"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## The Solution
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
# ✅ Just say what you mean:
|
|
63
|
+
assert llm(response, "Does this indicate the operation succeeded?")
|
|
64
|
+
assert llm(sql, "Is this a valid SELECT query that returns user names for users over 21?")
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Why This Works
|
|
68
|
+
|
|
69
|
+
The LLM evaluates your criterion against the content and returns a judgment. It understands:
|
|
70
|
+
|
|
71
|
+
- **Synonyms**: "success", "succeeded", "worked", "completed" all mean the same thing
|
|
72
|
+
- **Semantics**: Two SQL queries can be equivalent even with different syntax
|
|
73
|
+
- **Context**: "The operation failed successfully" is actually a failure
|
|
74
|
+
- **Intent**: Generated code can be correct even if it's not identical to a reference
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
## Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install pytest-llm-assert
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Setup
|
|
84
|
+
|
|
85
|
+
This library uses [LiteLLM](https://docs.litellm.ai/) under the hood, giving you access to **100+ LLM providers** with a unified API.
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# OpenAI
|
|
89
|
+
export OPENAI_API_KEY=sk-...
|
|
90
|
+
|
|
91
|
+
# Azure OpenAI with Entra ID (no API keys)
|
|
92
|
+
export AZURE_API_BASE=https://your-resource.openai.azure.com
|
|
93
|
+
export AZURE_API_VERSION=2024-02-15-preview
|
|
94
|
+
# Uses DefaultAzureCredential: az login, managed identity, etc.
|
|
95
|
+
|
|
96
|
+
# Ollama (local)
|
|
97
|
+
# Just run: ollama serve
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
See [LiteLLM docs](https://docs.litellm.ai/docs/providers) for all providers including Vertex AI, Bedrock, Anthropic, and more.
|
|
101
|
+
|
|
102
|
+
## Quick Start
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from pytest_llm_assert import LLMAssert
|
|
106
|
+
|
|
107
|
+
llm = LLMAssert(model="openai/gpt-5-mini") # Uses OPENAI_API_KEY from env
|
|
108
|
+
|
|
109
|
+
# Semantic assertions - returns True/False
|
|
110
|
+
assert llm("Operation completed successfully", "Does this indicate success?")
|
|
111
|
+
assert llm("Error: connection refused", "Does this indicate a failure?")
|
|
112
|
+
assert not llm("All tests passed", "Does this indicate a failure?")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Real Examples
|
|
116
|
+
|
|
117
|
+
First, create a fixture in `conftest.py`:
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
# conftest.py
|
|
121
|
+
import pytest
|
|
122
|
+
from pytest_llm_assert import LLMAssert
|
|
123
|
+
|
|
124
|
+
@pytest.fixture
|
|
125
|
+
def llm():
|
|
126
|
+
return LLMAssert(model="openai/gpt-5-mini")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Then use it in your tests:
|
|
130
|
+
|
|
131
|
+
### Testing Error Messages
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
def test_validation_error_is_helpful(llm):
|
|
135
|
+
"""Error messages should explain the problem clearly."""
|
|
136
|
+
error_msg = "ValidationError: 'port' must be an integer, got 'not-a-number'"
|
|
137
|
+
|
|
138
|
+
assert llm(error_msg, "Does this explain that port must be a number?")
|
|
139
|
+
assert llm(error_msg, "Does this indicate which field failed validation?")
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Testing Generated SQL
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
def test_query_builder_generates_valid_sql(llm):
|
|
146
|
+
"""Query builder should produce semantically correct SQL."""
|
|
147
|
+
query = "SELECT name FROM users WHERE age > 21 ORDER BY name"
|
|
148
|
+
|
|
149
|
+
assert llm(query, "Is this a valid SELECT query that returns names of users over 21?")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Testing LLM Output
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
def test_summary_is_comprehensive(llm):
|
|
156
|
+
"""Generated summaries should capture key points."""
|
|
157
|
+
summary = "The contract establishes a 2-year service agreement between..."
|
|
158
|
+
|
|
159
|
+
assert llm(summary, "Does this summarize a legal contract?")
|
|
160
|
+
assert llm(summary, "Does this mention the contract duration?")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Comparing Judge Models
|
|
164
|
+
|
|
165
|
+
Not sure which LLM to use as your assertion judge? Run the same tests against multiple models to find the best one for your use case:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
import pytest
|
|
169
|
+
from pytest_llm_assert import LLMAssert
|
|
170
|
+
|
|
171
|
+
MODELS = ["openai/gpt-5-mini", "anthropic/claude-sonnet-4-20250514", "ollama/llama3.1:8b"]
|
|
172
|
+
|
|
173
|
+
@pytest.fixture(params=MODELS)
|
|
174
|
+
def llm(request):
|
|
175
|
+
return LLMAssert(model=request.param)
|
|
176
|
+
|
|
177
|
+
def test_validates_sql_equivalence(llm):
|
|
178
|
+
"""Test which models can judge SQL semantic equivalence."""
|
|
179
|
+
sql = "SELECT u.name FROM users AS u WHERE u.age >= 22"
|
|
180
|
+
assert llm(sql, "Is this equivalent to selecting names of users over 21?")
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Output shows which judge models correctly evaluate your criterion:
|
|
184
|
+
```
|
|
185
|
+
test_validates_sql_equivalence[openai/gpt-5-mini] PASSED
|
|
186
|
+
test_validates_sql_equivalence[anthropic/claude-sonnet-4-20250514] PASSED
|
|
187
|
+
test_validates_sql_equivalence[ollama/llama3.1:8b] FAILED
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
> **Note:** This tests which LLM makes a good *judge* for your assertions. To test AI agents themselves (e.g., "does my coding agent produce working code?"), see [pytest-aitest](https://github.com/sbroenne/pytest-aitest).
|
|
191
|
+
|
|
192
|
+
## Configuration
|
|
193
|
+
|
|
194
|
+
### Programmatic
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from pytest_llm_assert import LLMAssert
|
|
198
|
+
|
|
199
|
+
llm = LLMAssert(
|
|
200
|
+
model="openai/gpt-5-mini",
|
|
201
|
+
api_key="sk-...", # Or use env var
|
|
202
|
+
api_base="https://...", # Custom endpoint
|
|
203
|
+
)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### CLI Options
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
pytest --llm-model=openai/gpt-5-mini
|
|
210
|
+
pytest --llm-api-key='${OPENAI_API_KEY}' # Env var expansion
|
|
211
|
+
pytest --llm-api-base=http://localhost:8080
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Environment Variables
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
export OPENAI_API_KEY=sk-...
|
|
218
|
+
export LLM_MODEL=openai/gpt-5-mini
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## API Reference
|
|
222
|
+
|
|
223
|
+
### `LLMAssert(model, api_key=None, api_base=None, **kwargs)`
|
|
224
|
+
|
|
225
|
+
Create an LLM assertion helper.
|
|
226
|
+
|
|
227
|
+
- `model`: LiteLLM model string (e.g., `"openai/gpt-5-mini"`, `"azure/gpt-4o"`)
|
|
228
|
+
- `api_key`: Optional API key (or use environment variables)
|
|
229
|
+
- `api_base`: Optional custom endpoint
|
|
230
|
+
- `**kwargs`: Additional parameters passed to LiteLLM
|
|
231
|
+
|
|
232
|
+
### `llm(content, criterion) -> AssertionResult`
|
|
233
|
+
|
|
234
|
+
Evaluate if content meets the criterion.
|
|
235
|
+
|
|
236
|
+
- Returns `AssertionResult` which is truthy if criterion is met
|
|
237
|
+
- Access `.reasoning` for the LLM's explanation
|
|
238
|
+
|
|
239
|
+
## See Also
|
|
240
|
+
|
|
241
|
+
- **[Examples](examples/)** — Example pytest tests showing basic usage, model comparison, and fixture patterns
|
|
242
|
+
- **[pytest-aitest](https://github.com/sbroenne/pytest-aitest)** — Full framework for testing MCP servers, CLIs, and AI agents. Uses pytest-llm-assert for the judge.
|
|
243
|
+
|
|
244
|
+
## License
|
|
245
|
+
|
|
246
|
+
MIT
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pytest_llm_assert/__init__.py,sha256=gp_z4g6Yf9SnjwEyZc6kPSqEWw2Nyb5er84HRuUaXCA,169
|
|
2
|
+
pytest_llm_assert/core.py,sha256=sDQvcus5EqHQ-_iQyLH2XB9nL4UhLpiWGTXnGhO7YyE,6351
|
|
3
|
+
pytest_llm_assert/plugin.py,sha256=g3sotHAeUXMuOsFQdaoIbn0CY24i-1CPv0EglrC5qtE,1327
|
|
4
|
+
pytest_llm_assert-0.1.0.dist-info/METADATA,sha256=cGK3fmb5T0ZKOBtM0PkmnRkAaGnLZ1aEDhBD5U8-1UQ,7713
|
|
5
|
+
pytest_llm_assert-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
pytest_llm_assert-0.1.0.dist-info/entry_points.txt,sha256=YEYg83TT6znVYdvFvZHJEOJ8XsZbcrqV9pY8uM-ThQE,49
|
|
7
|
+
pytest_llm_assert-0.1.0.dist-info/licenses/LICENSE,sha256=wHrdHpzRm4rdlyMdj-sQw7aou6kHPujW0VmRBEhInJ8,1072
|
|
8
|
+
pytest_llm_assert-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Stefan Broenner
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|