PyPI - pytest-llm-assert - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

pytest-llm-assert 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

pytest_llm_assert/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """pytest-llm-assert: Simple LLM-powered assertions for any pytest test."""
-from pytest_llm_assert.core import LLMAssert
+from pytest_llm_assert.core import AssertionResult, LLMAssert, LLMResponse
-__all__ = ["LLMAssert"]
+__all__ = ["LLMAssert", "AssertionResult", "LLMResponse"]
 __version__ = "0.1.0"

pytest_llm_assert/core.py CHANGED Viewed

@@ -6,14 +6,35 @@ import json
 import os
 import re
 from dataclasses import dataclass
+from pathlib import Path
 from typing import TYPE_CHECKING, Callable
 import litellm
+# Load default system prompt from file
+_PROMPTS_DIR = Path(__file__).parent / "prompts"
+_DEFAULT_SYSTEM_PROMPT = (_PROMPTS_DIR / "system_prompt.md").read_text().strip()
 if TYPE_CHECKING:
     from typing import Any
+@dataclass(slots=True)
+class LLMResponse:
+    """Response details from the last LLM call.
+    Access via `llm.response` after making an assertion call.
+    """
+    model: str | None = None
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
+    total_tokens: int | None = None
+    cost: float | None = None
+    response_id: str | None = None
+    created: int | None = None
 @dataclass(slots=True)
 class AssertionResult:
     """Result of an LLM assertion with rich repr for pytest."""
@@ -57,7 +78,8 @@ class LLMAssert:
         Args:
             model: LiteLLM model string (e.g., "openai/gpt-5-mini", "azure/gpt-4o")
-            api_key: API key (supports ${ENV_VAR} expansion). For Azure, leave empty to use Entra ID.
+            api_key: API key (supports ${ENV_VAR} expansion).
+                For Azure, leave empty to use Entra ID.
             api_base: Custom API base URL (required for Azure)
             **kwargs: Additional parameters passed to LiteLLM
         """
@@ -66,6 +88,8 @@ class LLMAssert:
         self.api_base = api_base
         self.kwargs = kwargs
         self._azure_ad_token_provider: Callable[[], str] | None = None
+        self._system_prompt: str = _DEFAULT_SYSTEM_PROMPT
+        self.response: LLMResponse | None = None
         # Auto-configure Azure Entra ID when no API key is provided
         if self._is_azure_model() and not self._has_azure_api_key():
@@ -86,7 +110,7 @@ class LLMAssert:
         Uses LiteLLM's built-in helper which leverages DefaultAzureCredential:
         - Azure CLI credentials (az login)
         - Managed Identity
-        - Environment variables (AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_TENANT_ID)
+        - Environment variables (AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, ...)
         - Visual Studio Code credentials
         """
         try:
@@ -102,6 +126,20 @@ class LLMAssert:
             # Credential not available
             return None
+    @property
+    def system_prompt(self) -> str:
+        """Get the system prompt used for LLM assertions."""
+        return self._system_prompt
+    @system_prompt.setter
+    def system_prompt(self, value: str) -> None:
+        """Set a custom system prompt for LLM assertions.
+        The prompt should instruct the LLM to evaluate content against a criterion
+        and respond in JSON format with 'result' (PASS/FAIL) and 'reasoning' keys.
+        """
+        self._system_prompt = value
     @staticmethod
     def _expand_env(value: str) -> str:
         """Expand ${VAR} patterns in string."""
@@ -116,7 +154,7 @@ class LLMAssert:
         return text[: max_len - 3] + "..."
     def _call_llm(self, messages: list[dict[str, str]]) -> str:
-        """Call the LLM and return response content."""
+        """Call the LLM and return response content. Updates self.response."""
         kwargs = {**self.kwargs}
         # Use Azure AD token provider if configured (Entra ID auth)
@@ -130,7 +168,27 @@ class LLMAssert:
             api_base=self.api_base,
             **kwargs,
         )
-        return response.choices[0].message.content or ""  # type: ignore[union-attr]
+        content = response.choices[0].message.content or ""  # type: ignore[union-attr]
+        # Store response details on instance
+        self.response = LLMResponse(
+            model=getattr(response, "model", None),
+            response_id=getattr(response, "id", None),
+            created=getattr(response, "created", None),
+        )
+        # Extract usage info
+        usage = getattr(response, "usage", None)
+        if usage:
+            self.response.prompt_tokens = getattr(usage, "prompt_tokens", None)
+            self.response.completion_tokens = getattr(usage, "completion_tokens", None)
+            self.response.total_tokens = getattr(usage, "total_tokens", None)
+        # Extract cost from hidden params (litellm calculates this)
+        if hasattr(response, "_hidden_params"):
+            self.response.cost = response._hidden_params.get("response_cost")
+        return content
     def __call__(self, content: str, criterion: str) -> AssertionResult:
         """Evaluate if content meets the given criterion.
@@ -145,12 +203,7 @@ class LLMAssert:
         messages = [
             {
                 "role": "system",
-                "content": (
-                    "You are an assertion evaluator. "
-                    "Evaluate if the given content meets the specified criterion.\n\n"
-                    "Respond in JSON format:\n"
-                    '{"result": "PASS" or "FAIL", "reasoning": "brief explanation"}'
-                ),
+                "content": self._system_prompt,
             },
             {
                 "role": "user",
@@ -158,12 +211,12 @@ class LLMAssert:
             },
         ]
-        response = self._call_llm(messages)
+        response_text = self._call_llm(messages)
         # Parse JSON response
         try:
             # Handle potential markdown code blocks
-            text = response.strip()
+            text = response_text.strip()
             if text.startswith("```"):
                 text = text.split("```")[1]
                 if text.startswith("json"):
@@ -173,10 +226,10 @@ class LLMAssert:
             reasoning = data.get("reasoning", "")
         except (json.JSONDecodeError, AttributeError):
             # Fallback to line-based parsing
-            lines = response.strip().split("\n", 1)
+            lines = response_text.strip().split("\n", 1)
             first_line = lines[0].strip().upper()
             passed = first_line in ("PASS", "YES", "TRUE", "PASSED")
-            reasoning = lines[1].strip() if len(lines) > 1 else response
+            reasoning = lines[1].strip() if len(lines) > 1 else response_text
         return AssertionResult(
             passed=passed,

pytest_llm_assert/prompts/system_prompt.md ADDED Viewed

@@ -0,0 +1,4 @@
+You are an assertion evaluator. Evaluate if the given content meets the specified criterion.
+Respond in JSON format:
+{"result": "PASS" or "FAIL", "reasoning": "brief explanation"}

pytest_llm_assert-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,135 @@
+Metadata-Version: 2.4
+Name: pytest-llm-assert
+Version: 0.2.0
+Summary: Simple LLM-powered assertions for any pytest test
+Project-URL: Homepage, https://github.com/sbroenne/pytest-llm-assert
+Project-URL: Documentation, https://github.com/sbroenne/pytest-llm-assert#readme
+Project-URL: Repository, https://github.com/sbroenne/pytest-llm-assert
+Author: Stefan Broenner
+License-Expression: MIT
+License-File: LICENSE
+Keywords: ai,assertions,llm,pytest,testing
+Classifier: Development Status :: 3 - Alpha
+Classifier: Framework :: Pytest
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.11
+Requires-Dist: azure-identity>=1.25
+Requires-Dist: litellm>=1.81
+Requires-Dist: pytest>=9.0
+Provides-Extra: dev
+Requires-Dist: pre-commit>=4.5; extra == 'dev'
+Requires-Dist: pyright>=1.1.408; extra == 'dev'
+Requires-Dist: pytest-cov>=6.0; extra == 'dev'
+Requires-Dist: pytest>=9.0; extra == 'dev'
+Requires-Dist: python-dotenv>=1.2; extra == 'dev'
+Requires-Dist: ruff>=0.14; extra == 'dev'
+Description-Content-Type: text/markdown
+# pytest-llm-assert
+[![PyPI version](https://img.shields.io/pypi/v/pytest-llm-assert)](https://pypi.org/project/pytest-llm-assert/)
+[![Python versions](https://img.shields.io/pypi/pyversions/pytest-llm-assert)](https://pypi.org/project/pytest-llm-assert/)
+[![CI](https://github.com/sbroenne/pytest-llm-assert/actions/workflows/ci.yml/badge.svg)](https://github.com/sbroenne/pytest-llm-assert/actions/workflows/ci.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+**Natural language assertions for pytest.**
+Testing a text-to-SQL agent? Validating LLM-generated code? Checking if error messages are helpful? Now you can:
+```python
+def test_sql_agent_output(llm):
+    sql = my_agent.generate("Get names of users over 21")
+    assert llm(sql, "Is this a valid SQL query that selects user names filtered by age > 21?")
+```
+The LLM evaluates your criterion and returns pass/fail — no regex, no parsing, no exact string matching.
+## Features
+- **Semantic assertions** — Assert meaning, not exact strings
+- **100+ LLM providers** — OpenAI, Azure, Anthropic, Ollama, Vertex AI, Bedrock via [LiteLLM](https://docs.litellm.ai/)
+- **pytest native** — Works as a standard pytest plugin/fixture
+- **Response introspection** — Access tokens, cost, and reasoning via `llm.response`
+## Installation
+```bash
+pip install pytest-llm-assert
+```
+## Quick Start
+```python
+# conftest.py
+import pytest
+from pytest_llm_assert import LLMAssert
+@pytest.fixture
+def llm():
+    return LLMAssert(model="openai/gpt-5-mini")
+```
+```python
+# test_my_agent.py
+def test_generated_sql_is_correct(llm):
+    sql = "SELECT name FROM users WHERE age > 21 ORDER BY name"
+    assert llm(sql, "Is this a valid SELECT query that returns names of users over 21?")
+def test_error_message_is_helpful(llm):
+    error = "ValidationError: 'port' must be an integer, got 'abc'"
+    assert llm(error, "Does this explain what went wrong and how to fix it?")
+def test_summary_captures_key_points(llm):
+    summary = generate_summary(document)
+    assert llm(summary, "Does this mention the contract duration and parties involved?")
+```
+## Setup
+Works out of the box with cloud identity — no API keys to manage:
+```bash
+# Azure (Entra ID)
+export AZURE_API_BASE=https://your-resource.openai.azure.com
+az login
+# Google Cloud (Vertex AI)
+gcloud auth application-default login
+# AWS (Bedrock)
+aws configure  # Uses IAM credentials
+```
+Supports 100+ providers via [LiteLLM](https://docs.litellm.ai/docs/providers) — including API key auth for OpenAI, Anthropic, Ollama, and more.
+## Documentation
+- **[Configuration](docs/configuration.md)** — All providers, CLI options, environment variables
+- **[API Reference](docs/api-reference.md)** — Full API documentation
+- **[Comparing Judge Models](docs/comparing-models.md)** — Evaluate which LLM works best for your assertions
+- **[Examples](examples/)** — Working pytest examples
+## Related
+- **[pytest-aitest](https://github.com/sbroenne/pytest-aitest)** — Full framework for testing MCP servers, CLIs, and AI agents
+- **[Contributing](CONTRIBUTING.md)** — Development setup and guidelines
+## Requirements
+- Python 3.11+
+- pytest 8.0+
+- An LLM (OpenAI, Azure, Anthropic, etc.) or local [Ollama](https://ollama.ai/)
+## Security
+- **Sensitive data**: Test content is sent to LLM providers — consider data policies
+## License
+MIT

pytest_llm_assert-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+pytest_llm_assert/__init__.py,sha256=OcOVlsvqZBBxMzrQssLbaWVkc4qKSvdOMtLVibzDfFQ,233
+pytest_llm_assert/core.py,sha256=B3lah5pPZ3VMbtzZ3bBtdJ2o8r_mZb7_aw375_hBC2Q,8145
+pytest_llm_assert/plugin.py,sha256=g3sotHAeUXMuOsFQdaoIbn0CY24i-1CPv0EglrC5qtE,1327
+pytest_llm_assert/prompts/system_prompt.md,sha256=RhSaYrpOjVcVwuG_af_Q50kHFhqXGOCKzubSYBXFzTA,181
+pytest_llm_assert-0.2.0.dist-info/METADATA,sha256=SdFu02CtgVAK14bwjP10clZgxFLQF8yEtiIm5Ct6BJE,4723
+pytest_llm_assert-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+pytest_llm_assert-0.2.0.dist-info/entry_points.txt,sha256=YEYg83TT6znVYdvFvZHJEOJ8XsZbcrqV9pY8uM-ThQE,49
+pytest_llm_assert-0.2.0.dist-info/licenses/LICENSE,sha256=wHrdHpzRm4rdlyMdj-sQw7aou6kHPujW0VmRBEhInJ8,1072
+pytest_llm_assert-0.2.0.dist-info/RECORD,,

pytest_llm_assert-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,246 +0,0 @@
-Metadata-Version: 2.4
-Name: pytest-llm-assert
-Version: 0.1.0
-Summary: Simple LLM-powered assertions for any pytest test
-Project-URL: Homepage, https://github.com/sbroenne/pytest-llm-assert
-Project-URL: Documentation, https://github.com/sbroenne/pytest-llm-assert#readme
-Project-URL: Repository, https://github.com/sbroenne/pytest-llm-assert
-Author: Stefan Broenner
-License-Expression: MIT
-License-File: LICENSE
-Keywords: ai,assertions,llm,pytest,testing
-Classifier: Development Status :: 3 - Alpha
-Classifier: Framework :: Pytest
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Python: >=3.11
-Requires-Dist: azure-identity>=1.15
-Requires-Dist: litellm>=1.55
-Requires-Dist: pytest>=8.0
-Provides-Extra: dev
-Requires-Dist: pyright>=1.1; extra == 'dev'
-Requires-Dist: pytest>=8.0; extra == 'dev'
-Requires-Dist: python-dotenv>=1.0; extra == 'dev'
-Requires-Dist: ruff>=0.8; extra == 'dev'
-Description-Content-Type: text/markdown
-# pytest-llm-assert
-**Natural language assertions for pytest.**
-A pytest plugin that lets you write semantic assertions using LLMs. Stop writing brittle string checks — let an LLM understand what you actually mean.
-## The Problem
-```python
-# ❌ These all fail even though they mean "success":
-assert "success" in response  # Fails on "Succeeded", "successful", "It worked!"
-assert response == "Operation completed successfully"  # Exact match? Really?
-assert re.match(r"success|succeeded|worked", response, re.I)  # Regex hell
-```
-```python
-# You're testing a text-to-SQL agent. How do you validate the output?
-# ❌ Exact match? There are many valid ways to write the same query:
-assert sql == "SELECT name FROM users WHERE age > 21"
-# ❌ Regex? Good luck covering all valid SQL syntax:
-assert re.match(r"SELECT\s+name\s+FROM\s+users", sql, re.I)
-# ❌ Parse it? Now you need a SQL parser as a test dependency:
-assert sqlparse.parse(sql)[0].get_type() == "SELECT"
-```
-## The Solution
-```python
-# ✅ Just say what you mean:
-assert llm(response, "Does this indicate the operation succeeded?")
-assert llm(sql, "Is this a valid SELECT query that returns user names for users over 21?")
-```
-## Why This Works
-The LLM evaluates your criterion against the content and returns a judgment. It understands:
-- **Synonyms**: "success", "succeeded", "worked", "completed" all mean the same thing
-- **Semantics**: Two SQL queries can be equivalent even with different syntax
-- **Context**: "The operation failed successfully" is actually a failure
-- **Intent**: Generated code can be correct even if it's not identical to a reference
-## Installation
-```bash
-pip install pytest-llm-assert
-```
-## Setup
-This library uses [LiteLLM](https://docs.litellm.ai/) under the hood, giving you access to **100+ LLM providers** with a unified API.
-```bash
-# OpenAI
-export OPENAI_API_KEY=sk-...
-# Azure OpenAI with Entra ID (no API keys)
-export AZURE_API_BASE=https://your-resource.openai.azure.com
-export AZURE_API_VERSION=2024-02-15-preview
-# Uses DefaultAzureCredential: az login, managed identity, etc.
-# Ollama (local)
-# Just run: ollama serve
-```
-See [LiteLLM docs](https://docs.litellm.ai/docs/providers) for all providers including Vertex AI, Bedrock, Anthropic, and more.
-## Quick Start
-```python
-from pytest_llm_assert import LLMAssert
-llm = LLMAssert(model="openai/gpt-5-mini")  # Uses OPENAI_API_KEY from env
-# Semantic assertions - returns True/False
-assert llm("Operation completed successfully", "Does this indicate success?")
-assert llm("Error: connection refused", "Does this indicate a failure?")
-assert not llm("All tests passed", "Does this indicate a failure?")
-```
-## Real Examples
-First, create a fixture in `conftest.py`:
-```python
-# conftest.py
-import pytest
-from pytest_llm_assert import LLMAssert
-@pytest.fixture
-def llm():
-    return LLMAssert(model="openai/gpt-5-mini")
-```
-Then use it in your tests:
-### Testing Error Messages
-```python
-def test_validation_error_is_helpful(llm):
-    """Error messages should explain the problem clearly."""
-    error_msg = "ValidationError: 'port' must be an integer, got 'not-a-number'"
-    assert llm(error_msg, "Does this explain that port must be a number?")
-    assert llm(error_msg, "Does this indicate which field failed validation?")
-```
-### Testing Generated SQL
-```python
-def test_query_builder_generates_valid_sql(llm):
-    """Query builder should produce semantically correct SQL."""
-    query = "SELECT name FROM users WHERE age > 21 ORDER BY name"
-    assert llm(query, "Is this a valid SELECT query that returns names of users over 21?")
-```
-### Testing LLM Output
-```python
-def test_summary_is_comprehensive(llm):
-    """Generated summaries should capture key points."""
-    summary = "The contract establishes a 2-year service agreement between..."
-    assert llm(summary, "Does this summarize a legal contract?")
-    assert llm(summary, "Does this mention the contract duration?")
-```
-## Comparing Judge Models
-Not sure which LLM to use as your assertion judge? Run the same tests against multiple models to find the best one for your use case:
-```python
-import pytest
-from pytest_llm_assert import LLMAssert
-MODELS = ["openai/gpt-5-mini", "anthropic/claude-sonnet-4-20250514", "ollama/llama3.1:8b"]
-@pytest.fixture(params=MODELS)
-def llm(request):
-    return LLMAssert(model=request.param)
-def test_validates_sql_equivalence(llm):
-    """Test which models can judge SQL semantic equivalence."""
-    sql = "SELECT u.name FROM users AS u WHERE u.age >= 22"
-    assert llm(sql, "Is this equivalent to selecting names of users over 21?")
-```
-Output shows which judge models correctly evaluate your criterion:
-```
-test_validates_sql_equivalence[openai/gpt-5-mini] PASSED
-test_validates_sql_equivalence[anthropic/claude-sonnet-4-20250514] PASSED
-test_validates_sql_equivalence[ollama/llama3.1:8b] FAILED
-```
-> **Note:** This tests which LLM makes a good *judge* for your assertions. To test AI agents themselves (e.g., "does my coding agent produce working code?"), see [pytest-aitest](https://github.com/sbroenne/pytest-aitest).
-## Configuration
-### Programmatic
-```python
-from pytest_llm_assert import LLMAssert
-llm = LLMAssert(
-    model="openai/gpt-5-mini",
-    api_key="sk-...",           # Or use env var
-    api_base="https://...",     # Custom endpoint
-)
-```
-### CLI Options
-```bash
-pytest --llm-model=openai/gpt-5-mini
-pytest --llm-api-key='${OPENAI_API_KEY}'  # Env var expansion
-pytest --llm-api-base=http://localhost:8080
-```
-### Environment Variables
-```bash
-export OPENAI_API_KEY=sk-...
-export LLM_MODEL=openai/gpt-5-mini
-```
-## API Reference
-### `LLMAssert(model, api_key=None, api_base=None, **kwargs)`
-Create an LLM assertion helper.
-- `model`: LiteLLM model string (e.g., `"openai/gpt-5-mini"`, `"azure/gpt-4o"`)
-- `api_key`: Optional API key (or use environment variables)
-- `api_base`: Optional custom endpoint
-- `**kwargs`: Additional parameters passed to LiteLLM
-### `llm(content, criterion) -> AssertionResult`
-Evaluate if content meets the criterion.
-- Returns `AssertionResult` which is truthy if criterion is met
-- Access `.reasoning` for the LLM's explanation
-## See Also
-- **[Examples](examples/)** — Example pytest tests showing basic usage, model comparison, and fixture patterns
-- **[pytest-aitest](https://github.com/sbroenne/pytest-aitest)** — Full framework for testing MCP servers, CLIs, and AI agents. Uses pytest-llm-assert for the judge.
-## License
-MIT

pytest_llm_assert-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-pytest_llm_assert/__init__.py,sha256=gp_z4g6Yf9SnjwEyZc6kPSqEWw2Nyb5er84HRuUaXCA,169
-pytest_llm_assert/core.py,sha256=sDQvcus5EqHQ-_iQyLH2XB9nL4UhLpiWGTXnGhO7YyE,6351
-pytest_llm_assert/plugin.py,sha256=g3sotHAeUXMuOsFQdaoIbn0CY24i-1CPv0EglrC5qtE,1327
-pytest_llm_assert-0.1.0.dist-info/METADATA,sha256=cGK3fmb5T0ZKOBtM0PkmnRkAaGnLZ1aEDhBD5U8-1UQ,7713
-pytest_llm_assert-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-pytest_llm_assert-0.1.0.dist-info/entry_points.txt,sha256=YEYg83TT6znVYdvFvZHJEOJ8XsZbcrqV9pY8uM-ThQE,49
-pytest_llm_assert-0.1.0.dist-info/licenses/LICENSE,sha256=wHrdHpzRm4rdlyMdj-sQw7aou6kHPujW0VmRBEhInJ8,1072
-pytest_llm_assert-0.1.0.dist-info/RECORD,,

{pytest_llm_assert-0.1.0.dist-info → pytest_llm_assert-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pytest_llm_assert-0.1.0.dist-info → pytest_llm_assert-0.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pytest_llm_assert-0.1.0.dist-info → pytest_llm_assert-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pytest-llm-assert 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

pytest-llm-assert 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl