pytest-llm-assert 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ """pytest-llm-assert: Simple LLM-powered assertions for any pytest test."""
2
+
3
+ from pytest_llm_assert.core import LLMAssert
4
+
5
+ __all__ = ["LLMAssert"]
6
+ __version__ = "0.1.0"
@@ -0,0 +1,186 @@
1
+ """Core LLM assertion implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import re
8
+ from dataclasses import dataclass
9
+ from typing import TYPE_CHECKING, Callable
10
+
11
+ import litellm
12
+
13
+ if TYPE_CHECKING:
14
+ from typing import Any
15
+
16
+
17
+ @dataclass(slots=True)
18
+ class AssertionResult:
19
+ """Result of an LLM assertion with rich repr for pytest."""
20
+
21
+ passed: bool
22
+ criterion: str
23
+ reasoning: str
24
+ content_preview: str
25
+
26
+ def __bool__(self) -> bool:
27
+ return self.passed
28
+
29
+ def __repr__(self) -> str:
30
+ status = "PASS" if self.passed else "FAIL"
31
+ return (
32
+ f"LLMAssert({status}: {self.criterion!r})\n"
33
+ f" Content: {self.content_preview!r}\n"
34
+ f" Reasoning: {self.reasoning}"
35
+ )
36
+
37
+
38
+ class LLMAssert:
39
+ """LLM-powered assertions for semantic evaluation.
40
+
41
+ Example:
42
+ >>> llm = LLMAssert(model="openai/gpt-5-mini")
43
+ >>> assert llm("Hello world", "Is this a greeting?")
44
+
45
+ For Azure OpenAI with Entra ID, just use `az login` - no API key needed:
46
+ >>> llm = LLMAssert(model="azure/gpt-4o", api_base="https://your-resource.openai.azure.com")
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ model: str = "openai/gpt-5-mini",
52
+ api_key: str | None = None,
53
+ api_base: str | None = None,
54
+ **kwargs: Any,
55
+ ) -> None:
56
+ """Initialize LLM assertion helper.
57
+
58
+ Args:
59
+ model: LiteLLM model string (e.g., "openai/gpt-5-mini", "azure/gpt-4o")
60
+ api_key: API key (supports ${ENV_VAR} expansion). For Azure, leave empty to use Entra ID.
61
+ api_base: Custom API base URL (required for Azure)
62
+ **kwargs: Additional parameters passed to LiteLLM
63
+ """
64
+ self.model = model
65
+ self.api_key = self._expand_env(api_key) if api_key else None
66
+ self.api_base = api_base
67
+ self.kwargs = kwargs
68
+ self._azure_ad_token_provider: Callable[[], str] | None = None
69
+
70
+ # Auto-configure Azure Entra ID when no API key is provided
71
+ if self._is_azure_model() and not self._has_azure_api_key():
72
+ self._azure_ad_token_provider = self._get_azure_ad_token_provider()
73
+
74
+ def _is_azure_model(self) -> bool:
75
+ """Check if the model is an Azure OpenAI model."""
76
+ return self.model.startswith("azure/")
77
+
78
+ def _has_azure_api_key(self) -> bool:
79
+ """Check if an Azure API key is available."""
80
+ return bool(self.api_key or os.environ.get("AZURE_API_KEY"))
81
+
82
+ @staticmethod
83
+ def _get_azure_ad_token_provider() -> Callable[[], str] | None:
84
+ """Get Azure AD token provider for Entra ID authentication.
85
+
86
+ Uses LiteLLM's built-in helper which leverages DefaultAzureCredential:
87
+ - Azure CLI credentials (az login)
88
+ - Managed Identity
89
+ - Environment variables (AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_TENANT_ID)
90
+ - Visual Studio Code credentials
91
+ """
92
+ try:
93
+ from litellm.secret_managers.get_azure_ad_token_provider import (
94
+ get_azure_ad_token_provider,
95
+ )
96
+
97
+ return get_azure_ad_token_provider()
98
+ except ImportError:
99
+ # azure-identity not installed
100
+ return None
101
+ except Exception:
102
+ # Credential not available
103
+ return None
104
+
105
+ @staticmethod
106
+ def _expand_env(value: str) -> str:
107
+ """Expand ${VAR} patterns in string."""
108
+ pattern = r"\$\{([^}]+)\}"
109
+ return re.sub(pattern, lambda m: os.environ.get(m.group(1), m.group(0)), value)
110
+
111
+ @staticmethod
112
+ def _truncate(text: str, max_len: int = 100) -> str:
113
+ """Truncate text for display."""
114
+ if len(text) <= max_len:
115
+ return text
116
+ return text[: max_len - 3] + "..."
117
+
118
+ def _call_llm(self, messages: list[dict[str, str]]) -> str:
119
+ """Call the LLM and return response content."""
120
+ kwargs = {**self.kwargs}
121
+
122
+ # Use Azure AD token provider if configured (Entra ID auth)
123
+ if self._azure_ad_token_provider is not None:
124
+ kwargs["azure_ad_token_provider"] = self._azure_ad_token_provider
125
+
126
+ response = litellm.completion(
127
+ model=self.model,
128
+ messages=messages,
129
+ api_key=self.api_key,
130
+ api_base=self.api_base,
131
+ **kwargs,
132
+ )
133
+ return response.choices[0].message.content or "" # type: ignore[union-attr]
134
+
135
+ def __call__(self, content: str, criterion: str) -> AssertionResult:
136
+ """Evaluate if content meets the given criterion.
137
+
138
+ Args:
139
+ content: The text to evaluate
140
+ criterion: Plain English criterion (e.g., "Is this professional?")
141
+
142
+ Returns:
143
+ AssertionResult that is truthy if criterion is met
144
+ """
145
+ messages = [
146
+ {
147
+ "role": "system",
148
+ "content": (
149
+ "You are an assertion evaluator. "
150
+ "Evaluate if the given content meets the specified criterion.\n\n"
151
+ "Respond in JSON format:\n"
152
+ '{"result": "PASS" or "FAIL", "reasoning": "brief explanation"}'
153
+ ),
154
+ },
155
+ {
156
+ "role": "user",
157
+ "content": f"Criterion: {criterion}\n\nContent:\n{content}",
158
+ },
159
+ ]
160
+
161
+ response = self._call_llm(messages)
162
+
163
+ # Parse JSON response
164
+ try:
165
+ # Handle potential markdown code blocks
166
+ text = response.strip()
167
+ if text.startswith("```"):
168
+ text = text.split("```")[1]
169
+ if text.startswith("json"):
170
+ text = text[4:]
171
+ data = json.loads(text.strip())
172
+ passed = data.get("result", "").upper() == "PASS"
173
+ reasoning = data.get("reasoning", "")
174
+ except (json.JSONDecodeError, AttributeError):
175
+ # Fallback to line-based parsing
176
+ lines = response.strip().split("\n", 1)
177
+ first_line = lines[0].strip().upper()
178
+ passed = first_line in ("PASS", "YES", "TRUE", "PASSED")
179
+ reasoning = lines[1].strip() if len(lines) > 1 else response
180
+
181
+ return AssertionResult(
182
+ passed=passed,
183
+ criterion=criterion,
184
+ reasoning=reasoning,
185
+ content_preview=self._truncate(content),
186
+ )
@@ -0,0 +1,43 @@
1
+ """pytest plugin providing LLM-powered assertion fixtures."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from pytest_llm_assert.core import LLMAssert
8
+
9
+
10
+ def pytest_addoption(parser: pytest.Parser) -> None:
11
+ """Add pytest CLI options for LLM assertions."""
12
+ group = parser.getgroup("llm-assert", "LLM-powered assertions")
13
+ group.addoption(
14
+ "--llm-model",
15
+ default="openai/gpt-5-mini",
16
+ help="Default LiteLLM model for assertions (default: openai/gpt-5-mini)",
17
+ )
18
+ group.addoption(
19
+ "--llm-api-key",
20
+ default=None,
21
+ help="API key for LLM provider (supports ${ENV_VAR} expansion)",
22
+ )
23
+ group.addoption(
24
+ "--llm-api-base",
25
+ default=None,
26
+ help="Custom API base URL for LLM provider",
27
+ )
28
+
29
+
30
+ @pytest.fixture
31
+ def llm_assert(request: pytest.FixtureRequest) -> LLMAssert:
32
+ """Fixture providing an LLMAssert instance configured via CLI options.
33
+
34
+ Example:
35
+ def test_greeting(llm_assert):
36
+ response = "Hello! How can I help you today?"
37
+ assert llm_assert(response, "Is this a friendly greeting?")
38
+ """
39
+ return LLMAssert(
40
+ model=request.config.getoption("--llm-model"),
41
+ api_key=request.config.getoption("--llm-api-key"),
42
+ api_base=request.config.getoption("--llm-api-base"),
43
+ )
@@ -0,0 +1,246 @@
1
+ Metadata-Version: 2.4
2
+ Name: pytest-llm-assert
3
+ Version: 0.1.0
4
+ Summary: Simple LLM-powered assertions for any pytest test
5
+ Project-URL: Homepage, https://github.com/sbroenne/pytest-llm-assert
6
+ Project-URL: Documentation, https://github.com/sbroenne/pytest-llm-assert#readme
7
+ Project-URL: Repository, https://github.com/sbroenne/pytest-llm-assert
8
+ Author: Stefan Broenner
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai,assertions,llm,pytest,testing
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Framework :: Pytest
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: azure-identity>=1.15
22
+ Requires-Dist: litellm>=1.55
23
+ Requires-Dist: pytest>=8.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: pyright>=1.1; extra == 'dev'
26
+ Requires-Dist: pytest>=8.0; extra == 'dev'
27
+ Requires-Dist: python-dotenv>=1.0; extra == 'dev'
28
+ Requires-Dist: ruff>=0.8; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # pytest-llm-assert
32
+
33
+ **Natural language assertions for pytest.**
34
+
35
+ A pytest plugin that lets you write semantic assertions using LLMs. Stop writing brittle string checks — let an LLM understand what you actually mean.
36
+
37
+ ## The Problem
38
+
39
+ ```python
40
+ # ❌ These all fail even though they mean "success":
41
+ assert "success" in response # Fails on "Succeeded", "successful", "It worked!"
42
+ assert response == "Operation completed successfully" # Exact match? Really?
43
+ assert re.match(r"success|succeeded|worked", response, re.I) # Regex hell
44
+ ```
45
+
46
+ ```python
47
+ # You're testing a text-to-SQL agent. How do you validate the output?
48
+
49
+ # ❌ Exact match? There are many valid ways to write the same query:
50
+ assert sql == "SELECT name FROM users WHERE age > 21"
51
+
52
+ # ❌ Regex? Good luck covering all valid SQL syntax:
53
+ assert re.match(r"SELECT\s+name\s+FROM\s+users", sql, re.I)
54
+
55
+ # ❌ Parse it? Now you need a SQL parser as a test dependency:
56
+ assert sqlparse.parse(sql)[0].get_type() == "SELECT"
57
+ ```
58
+
59
+ ## The Solution
60
+
61
+ ```python
62
+ # ✅ Just say what you mean:
63
+ assert llm(response, "Does this indicate the operation succeeded?")
64
+ assert llm(sql, "Is this a valid SELECT query that returns user names for users over 21?")
65
+ ```
66
+
67
+ ## Why This Works
68
+
69
+ The LLM evaluates your criterion against the content and returns a judgment. It understands:
70
+
71
+ - **Synonyms**: "success", "succeeded", "worked", "completed" all mean the same thing
72
+ - **Semantics**: Two SQL queries can be equivalent even with different syntax
73
+ - **Context**: "The operation failed successfully" is actually a failure
74
+ - **Intent**: Generated code can be correct even if it's not identical to a reference
75
+
76
+
77
+ ## Installation
78
+
79
+ ```bash
80
+ pip install pytest-llm-assert
81
+ ```
82
+
83
+ ## Setup
84
+
85
+ This library uses [LiteLLM](https://docs.litellm.ai/) under the hood, giving you access to **100+ LLM providers** with a unified API.
86
+
87
+ ```bash
88
+ # OpenAI
89
+ export OPENAI_API_KEY=sk-...
90
+
91
+ # Azure OpenAI with Entra ID (no API keys)
92
+ export AZURE_API_BASE=https://your-resource.openai.azure.com
93
+ export AZURE_API_VERSION=2024-02-15-preview
94
+ # Uses DefaultAzureCredential: az login, managed identity, etc.
95
+
96
+ # Ollama (local)
97
+ # Just run: ollama serve
98
+ ```
99
+
100
+ See [LiteLLM docs](https://docs.litellm.ai/docs/providers) for all providers including Vertex AI, Bedrock, Anthropic, and more.
101
+
102
+ ## Quick Start
103
+
104
+ ```python
105
+ from pytest_llm_assert import LLMAssert
106
+
107
+ llm = LLMAssert(model="openai/gpt-5-mini") # Uses OPENAI_API_KEY from env
108
+
109
+ # Semantic assertions - returns True/False
110
+ assert llm("Operation completed successfully", "Does this indicate success?")
111
+ assert llm("Error: connection refused", "Does this indicate a failure?")
112
+ assert not llm("All tests passed", "Does this indicate a failure?")
113
+ ```
114
+
115
+ ## Real Examples
116
+
117
+ First, create a fixture in `conftest.py`:
118
+
119
+ ```python
120
+ # conftest.py
121
+ import pytest
122
+ from pytest_llm_assert import LLMAssert
123
+
124
+ @pytest.fixture
125
+ def llm():
126
+ return LLMAssert(model="openai/gpt-5-mini")
127
+ ```
128
+
129
+ Then use it in your tests:
130
+
131
+ ### Testing Error Messages
132
+
133
+ ```python
134
+ def test_validation_error_is_helpful(llm):
135
+ """Error messages should explain the problem clearly."""
136
+ error_msg = "ValidationError: 'port' must be an integer, got 'not-a-number'"
137
+
138
+ assert llm(error_msg, "Does this explain that port must be a number?")
139
+ assert llm(error_msg, "Does this indicate which field failed validation?")
140
+ ```
141
+
142
+ ### Testing Generated SQL
143
+
144
+ ```python
145
+ def test_query_builder_generates_valid_sql(llm):
146
+ """Query builder should produce semantically correct SQL."""
147
+ query = "SELECT name FROM users WHERE age > 21 ORDER BY name"
148
+
149
+ assert llm(query, "Is this a valid SELECT query that returns names of users over 21?")
150
+ ```
151
+
152
+ ### Testing LLM Output
153
+
154
+ ```python
155
+ def test_summary_is_comprehensive(llm):
156
+ """Generated summaries should capture key points."""
157
+ summary = "The contract establishes a 2-year service agreement between..."
158
+
159
+ assert llm(summary, "Does this summarize a legal contract?")
160
+ assert llm(summary, "Does this mention the contract duration?")
161
+ ```
162
+
163
+ ## Comparing Judge Models
164
+
165
+ Not sure which LLM to use as your assertion judge? Run the same tests against multiple models to find the best one for your use case:
166
+
167
+ ```python
168
+ import pytest
169
+ from pytest_llm_assert import LLMAssert
170
+
171
+ MODELS = ["openai/gpt-5-mini", "anthropic/claude-sonnet-4-20250514", "ollama/llama3.1:8b"]
172
+
173
+ @pytest.fixture(params=MODELS)
174
+ def llm(request):
175
+ return LLMAssert(model=request.param)
176
+
177
+ def test_validates_sql_equivalence(llm):
178
+ """Test which models can judge SQL semantic equivalence."""
179
+ sql = "SELECT u.name FROM users AS u WHERE u.age >= 22"
180
+ assert llm(sql, "Is this equivalent to selecting names of users over 21?")
181
+ ```
182
+
183
+ Output shows which judge models correctly evaluate your criterion:
184
+ ```
185
+ test_validates_sql_equivalence[openai/gpt-5-mini] PASSED
186
+ test_validates_sql_equivalence[anthropic/claude-sonnet-4-20250514] PASSED
187
+ test_validates_sql_equivalence[ollama/llama3.1:8b] FAILED
188
+ ```
189
+
190
+ > **Note:** This tests which LLM makes a good *judge* for your assertions. To test AI agents themselves (e.g., "does my coding agent produce working code?"), see [pytest-aitest](https://github.com/sbroenne/pytest-aitest).
191
+
192
+ ## Configuration
193
+
194
+ ### Programmatic
195
+
196
+ ```python
197
+ from pytest_llm_assert import LLMAssert
198
+
199
+ llm = LLMAssert(
200
+ model="openai/gpt-5-mini",
201
+ api_key="sk-...", # Or use env var
202
+ api_base="https://...", # Custom endpoint
203
+ )
204
+ ```
205
+
206
+ ### CLI Options
207
+
208
+ ```bash
209
+ pytest --llm-model=openai/gpt-5-mini
210
+ pytest --llm-api-key='${OPENAI_API_KEY}' # Env var expansion
211
+ pytest --llm-api-base=http://localhost:8080
212
+ ```
213
+
214
+ ### Environment Variables
215
+
216
+ ```bash
217
+ export OPENAI_API_KEY=sk-...
218
+ export LLM_MODEL=openai/gpt-5-mini
219
+ ```
220
+
221
+ ## API Reference
222
+
223
+ ### `LLMAssert(model, api_key=None, api_base=None, **kwargs)`
224
+
225
+ Create an LLM assertion helper.
226
+
227
+ - `model`: LiteLLM model string (e.g., `"openai/gpt-5-mini"`, `"azure/gpt-4o"`)
228
+ - `api_key`: Optional API key (or use environment variables)
229
+ - `api_base`: Optional custom endpoint
230
+ - `**kwargs`: Additional parameters passed to LiteLLM
231
+
232
+ ### `llm(content, criterion) -> AssertionResult`
233
+
234
+ Evaluate if content meets the criterion.
235
+
236
+ - Returns `AssertionResult` which is truthy if criterion is met
237
+ - Access `.reasoning` for the LLM's explanation
238
+
239
+ ## See Also
240
+
241
+ - **[Examples](examples/)** — Example pytest tests showing basic usage, model comparison, and fixture patterns
242
+ - **[pytest-aitest](https://github.com/sbroenne/pytest-aitest)** — Full framework for testing MCP servers, CLIs, and AI agents. Uses pytest-llm-assert for the judge.
243
+
244
+ ## License
245
+
246
+ MIT
@@ -0,0 +1,8 @@
1
+ pytest_llm_assert/__init__.py,sha256=gp_z4g6Yf9SnjwEyZc6kPSqEWw2Nyb5er84HRuUaXCA,169
2
+ pytest_llm_assert/core.py,sha256=sDQvcus5EqHQ-_iQyLH2XB9nL4UhLpiWGTXnGhO7YyE,6351
3
+ pytest_llm_assert/plugin.py,sha256=g3sotHAeUXMuOsFQdaoIbn0CY24i-1CPv0EglrC5qtE,1327
4
+ pytest_llm_assert-0.1.0.dist-info/METADATA,sha256=cGK3fmb5T0ZKOBtM0PkmnRkAaGnLZ1aEDhBD5U8-1UQ,7713
5
+ pytest_llm_assert-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ pytest_llm_assert-0.1.0.dist-info/entry_points.txt,sha256=YEYg83TT6znVYdvFvZHJEOJ8XsZbcrqV9pY8uM-ThQE,49
7
+ pytest_llm_assert-0.1.0.dist-info/licenses/LICENSE,sha256=wHrdHpzRm4rdlyMdj-sQw7aou6kHPujW0VmRBEhInJ8,1072
8
+ pytest_llm_assert-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [pytest11]
2
+ llm_assert = pytest_llm_assert.plugin
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Stefan Broenner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.