prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Semantic assertions using embedding-based similarity.
|
|
3
|
+
|
|
4
|
+
Requires sentence-transformers library:
|
|
5
|
+
pip install sentence-transformers
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
# Check tier before allowing semantic assertions
|
|
14
|
+
from prela.license import check_tier
|
|
15
|
+
|
|
16
|
+
if not check_tier("Semantic assertions", "lunch-money", silent=False):
|
|
17
|
+
raise ImportError(
|
|
18
|
+
"Semantic assertions require 'lunch-money' subscription or higher. "
|
|
19
|
+
"Upgrade at https://prela.dev/pricing"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from sentence_transformers import SentenceTransformer
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
SENTENCE_TRANSFORMERS_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
SENTENCE_TRANSFORMERS_AVAILABLE = False
|
|
29
|
+
SentenceTransformer = None
|
|
30
|
+
np = None
|
|
31
|
+
|
|
32
|
+
from prela.core.span import Span
|
|
33
|
+
from prela.evals.assertions.base import AssertionResult, BaseAssertion
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SemanticSimilarityAssertion(BaseAssertion):
|
|
37
|
+
"""Assert that output is semantically similar to expected text.
|
|
38
|
+
|
|
39
|
+
Uses sentence embeddings to compare semantic meaning rather than exact
|
|
40
|
+
text matching. Useful for evaluating LLM outputs where phrasing varies
|
|
41
|
+
but meaning should be consistent.
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
>>> assertion = SemanticSimilarityAssertion(
|
|
45
|
+
... expected_text="The weather is nice today",
|
|
46
|
+
... threshold=0.8
|
|
47
|
+
... )
|
|
48
|
+
>>> result = assertion.evaluate(
|
|
49
|
+
... output="Today has beautiful weather",
|
|
50
|
+
... expected=None,
|
|
51
|
+
... trace=None
|
|
52
|
+
... )
|
|
53
|
+
>>> assert result.passed # High similarity despite different wording
|
|
54
|
+
|
|
55
|
+
Requires:
|
|
56
|
+
pip install sentence-transformers
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Class-level model cache (shared across instances)
|
|
60
|
+
_model_cache: dict[str, Any] = {}
|
|
61
|
+
|
|
62
|
+
# Embedding cache (to avoid recomputing for same text)
|
|
63
|
+
_embedding_cache: dict[str, Any] = {}
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
expected_text: str,
|
|
68
|
+
threshold: float = 0.8,
|
|
69
|
+
model_name: str = "all-MiniLM-L6-v2",
|
|
70
|
+
):
|
|
71
|
+
"""Initialize semantic similarity assertion.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
expected_text: Text to compare against
|
|
75
|
+
threshold: Minimum cosine similarity score (0-1) to pass
|
|
76
|
+
model_name: Sentence transformer model to use
|
|
77
|
+
(default: all-MiniLM-L6-v2, fast and accurate)
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ImportError: If sentence-transformers is not installed
|
|
81
|
+
ValueError: If threshold is not between 0 and 1
|
|
82
|
+
"""
|
|
83
|
+
if not SENTENCE_TRANSFORMERS_AVAILABLE:
|
|
84
|
+
raise ImportError(
|
|
85
|
+
"sentence-transformers required for semantic similarity. "
|
|
86
|
+
"Install with: pip install sentence-transformers"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if not 0 <= threshold <= 1:
|
|
90
|
+
raise ValueError(f"threshold must be between 0 and 1, got {threshold}")
|
|
91
|
+
|
|
92
|
+
self.expected_text = expected_text
|
|
93
|
+
self.threshold = threshold
|
|
94
|
+
self.model_name = model_name
|
|
95
|
+
|
|
96
|
+
# Load model (cached at class level)
|
|
97
|
+
if model_name not in self._model_cache:
|
|
98
|
+
self._model_cache[model_name] = SentenceTransformer(model_name)
|
|
99
|
+
|
|
100
|
+
self.model = self._model_cache[model_name]
|
|
101
|
+
|
|
102
|
+
def _get_embedding(self, text: str) -> Any:
|
|
103
|
+
"""Get embedding for text, using cache if available."""
|
|
104
|
+
# Create cache key from text hash
|
|
105
|
+
text_hash = hashlib.md5(text.encode()).hexdigest()
|
|
106
|
+
cache_key = f"{self.model_name}:{text_hash}"
|
|
107
|
+
|
|
108
|
+
if cache_key not in self._embedding_cache:
|
|
109
|
+
self._embedding_cache[cache_key] = self.model.encode(
|
|
110
|
+
text,
|
|
111
|
+
convert_to_numpy=True,
|
|
112
|
+
normalize_embeddings=True,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return self._embedding_cache[cache_key]
|
|
116
|
+
|
|
117
|
+
def _cosine_similarity(self, embedding1: Any, embedding2: Any) -> float:
|
|
118
|
+
"""Compute cosine similarity between two embeddings."""
|
|
119
|
+
# Embeddings are already normalized, so dot product = cosine similarity
|
|
120
|
+
return float(np.dot(embedding1, embedding2))
|
|
121
|
+
|
|
122
|
+
def evaluate(
|
|
123
|
+
self,
|
|
124
|
+
output: Any,
|
|
125
|
+
expected: Any | None,
|
|
126
|
+
trace: list[Span] | None,
|
|
127
|
+
) -> AssertionResult:
|
|
128
|
+
"""Check if output is semantically similar to expected text."""
|
|
129
|
+
output_str = str(output)
|
|
130
|
+
|
|
131
|
+
# Get embeddings
|
|
132
|
+
try:
|
|
133
|
+
output_embedding = self._get_embedding(output_str)
|
|
134
|
+
expected_embedding = self._get_embedding(self.expected_text)
|
|
135
|
+
except Exception as e:
|
|
136
|
+
return AssertionResult(
|
|
137
|
+
passed=False,
|
|
138
|
+
assertion_type="semantic_similarity",
|
|
139
|
+
message=f"Failed to compute embeddings: {e}",
|
|
140
|
+
expected=self.expected_text,
|
|
141
|
+
actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
|
|
142
|
+
details={"error": str(e)},
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Compute similarity
|
|
146
|
+
similarity = self._cosine_similarity(output_embedding, expected_embedding)
|
|
147
|
+
passed = similarity >= self.threshold
|
|
148
|
+
|
|
149
|
+
# Determine score interpretation
|
|
150
|
+
if similarity >= 0.9:
|
|
151
|
+
interpretation = "very high"
|
|
152
|
+
elif similarity >= 0.8:
|
|
153
|
+
interpretation = "high"
|
|
154
|
+
elif similarity >= 0.7:
|
|
155
|
+
interpretation = "moderate"
|
|
156
|
+
elif similarity >= 0.6:
|
|
157
|
+
interpretation = "low"
|
|
158
|
+
else:
|
|
159
|
+
interpretation = "very low"
|
|
160
|
+
|
|
161
|
+
if passed:
|
|
162
|
+
message = (
|
|
163
|
+
f"Output is semantically similar to expected "
|
|
164
|
+
f"(similarity: {similarity:.3f}, {interpretation})"
|
|
165
|
+
)
|
|
166
|
+
else:
|
|
167
|
+
message = (
|
|
168
|
+
f"Output is not semantically similar enough "
|
|
169
|
+
f"(similarity: {similarity:.3f} < threshold: {self.threshold}, {interpretation})"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return AssertionResult(
|
|
173
|
+
passed=passed,
|
|
174
|
+
assertion_type="semantic_similarity",
|
|
175
|
+
message=message,
|
|
176
|
+
score=similarity,
|
|
177
|
+
expected=self.expected_text,
|
|
178
|
+
actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
|
|
179
|
+
details={
|
|
180
|
+
"similarity": similarity,
|
|
181
|
+
"threshold": self.threshold,
|
|
182
|
+
"interpretation": interpretation,
|
|
183
|
+
"model": self.model_name,
|
|
184
|
+
},
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
def from_config(cls, config: dict[str, Any]) -> SemanticSimilarityAssertion:
|
|
189
|
+
"""Create from configuration.
|
|
190
|
+
|
|
191
|
+
Config format:
|
|
192
|
+
{
|
|
193
|
+
"expected_text": "The expected output",
|
|
194
|
+
"threshold": 0.8, # optional, default: 0.8
|
|
195
|
+
"model_name": "all-MiniLM-L6-v2" # optional
|
|
196
|
+
}
|
|
197
|
+
"""
|
|
198
|
+
if "expected_text" not in config:
|
|
199
|
+
raise ValueError("SemanticSimilarityAssertion requires 'expected_text' in config")
|
|
200
|
+
|
|
201
|
+
return cls(
|
|
202
|
+
expected_text=config["expected_text"],
|
|
203
|
+
threshold=config.get("threshold", 0.8),
|
|
204
|
+
model_name=config.get("model_name", "all-MiniLM-L6-v2"),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def __repr__(self) -> str:
|
|
208
|
+
return (
|
|
209
|
+
f"SemanticSimilarityAssertion("
|
|
210
|
+
f"expected_text={self.expected_text[:30]!r}..., "
|
|
211
|
+
f"threshold={self.threshold}, "
|
|
212
|
+
f"model_name={self.model_name!r})"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
@classmethod
|
|
216
|
+
def clear_cache(cls) -> None:
|
|
217
|
+
"""Clear the embedding cache. Useful for testing or memory management."""
|
|
218
|
+
cls._embedding_cache.clear()
|
|
219
|
+
|
|
220
|
+
@classmethod
|
|
221
|
+
def get_cache_size(cls) -> int:
|
|
222
|
+
"""Get the number of cached embeddings."""
|
|
223
|
+
return len(cls._embedding_cache)
|
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structural assertions for text and data format validation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from prela.core.span import Span
|
|
12
|
+
from prela.evals.assertions.base import AssertionResult, BaseAssertion
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ContainsAssertion(BaseAssertion):
|
|
16
|
+
"""Assert that output contains specified text.
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
>>> assertion = ContainsAssertion(text="error", case_sensitive=False)
|
|
20
|
+
>>> result = assertion.evaluate(output="Error occurred", expected=None, trace=None)
|
|
21
|
+
>>> assert result.passed
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, text: str, case_sensitive: bool = True):
|
|
25
|
+
"""Initialize contains assertion.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
text: Text that must be present in output
|
|
29
|
+
case_sensitive: Whether to perform case-sensitive matching
|
|
30
|
+
"""
|
|
31
|
+
self.text = text
|
|
32
|
+
self.case_sensitive = case_sensitive
|
|
33
|
+
|
|
34
|
+
def evaluate(
|
|
35
|
+
self,
|
|
36
|
+
output: Any,
|
|
37
|
+
expected: Any | None,
|
|
38
|
+
trace: list[Span] | None,
|
|
39
|
+
) -> AssertionResult:
|
|
40
|
+
"""Check if output contains the specified text."""
|
|
41
|
+
output_str = str(output)
|
|
42
|
+
text = self.text
|
|
43
|
+
|
|
44
|
+
if not self.case_sensitive:
|
|
45
|
+
output_str = output_str.lower()
|
|
46
|
+
text = text.lower()
|
|
47
|
+
|
|
48
|
+
passed = text in output_str
|
|
49
|
+
|
|
50
|
+
if passed:
|
|
51
|
+
message = f"Output contains '{self.text}'"
|
|
52
|
+
else:
|
|
53
|
+
message = f"Output does not contain '{self.text}'"
|
|
54
|
+
|
|
55
|
+
return AssertionResult(
|
|
56
|
+
passed=passed,
|
|
57
|
+
assertion_type="contains",
|
|
58
|
+
message=message,
|
|
59
|
+
expected=self.text,
|
|
60
|
+
actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
|
|
61
|
+
details={"case_sensitive": self.case_sensitive},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def from_config(cls, config: dict[str, Any]) -> ContainsAssertion:
|
|
66
|
+
"""Create from configuration.
|
|
67
|
+
|
|
68
|
+
Config format:
|
|
69
|
+
{
|
|
70
|
+
"text": "required text",
|
|
71
|
+
"case_sensitive": true # optional, default: true
|
|
72
|
+
}
|
|
73
|
+
"""
|
|
74
|
+
if "text" not in config:
|
|
75
|
+
raise ValueError("ContainsAssertion requires 'text' in config")
|
|
76
|
+
|
|
77
|
+
return cls(
|
|
78
|
+
text=config["text"],
|
|
79
|
+
case_sensitive=config.get("case_sensitive", True),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def __repr__(self) -> str:
|
|
83
|
+
return f"ContainsAssertion(text={self.text!r}, case_sensitive={self.case_sensitive})"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class NotContainsAssertion(BaseAssertion):
|
|
87
|
+
"""Assert that output does NOT contain specified text.
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
>>> assertion = NotContainsAssertion(text="error")
|
|
91
|
+
>>> result = assertion.evaluate(output="Success!", expected=None, trace=None)
|
|
92
|
+
>>> assert result.passed
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self, text: str, case_sensitive: bool = True):
|
|
96
|
+
"""Initialize not-contains assertion.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
text: Text that must NOT be present in output
|
|
100
|
+
case_sensitive: Whether to perform case-sensitive matching
|
|
101
|
+
"""
|
|
102
|
+
self.text = text
|
|
103
|
+
self.case_sensitive = case_sensitive
|
|
104
|
+
|
|
105
|
+
def evaluate(
|
|
106
|
+
self,
|
|
107
|
+
output: Any,
|
|
108
|
+
expected: Any | None,
|
|
109
|
+
trace: list[Span] | None,
|
|
110
|
+
) -> AssertionResult:
|
|
111
|
+
"""Check if output does not contain the specified text."""
|
|
112
|
+
output_str = str(output)
|
|
113
|
+
text = self.text
|
|
114
|
+
|
|
115
|
+
if not self.case_sensitive:
|
|
116
|
+
output_str = output_str.lower()
|
|
117
|
+
text = text.lower()
|
|
118
|
+
|
|
119
|
+
passed = text not in output_str
|
|
120
|
+
|
|
121
|
+
if passed:
|
|
122
|
+
message = f"Output correctly does not contain '{self.text}'"
|
|
123
|
+
else:
|
|
124
|
+
message = f"Output incorrectly contains '{self.text}'"
|
|
125
|
+
|
|
126
|
+
return AssertionResult(
|
|
127
|
+
passed=passed,
|
|
128
|
+
assertion_type="not_contains",
|
|
129
|
+
message=message,
|
|
130
|
+
expected=f"not containing '{self.text}'",
|
|
131
|
+
actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
|
|
132
|
+
details={"case_sensitive": self.case_sensitive},
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def from_config(cls, config: dict[str, Any]) -> NotContainsAssertion:
|
|
137
|
+
"""Create from configuration.
|
|
138
|
+
|
|
139
|
+
Config format:
|
|
140
|
+
{
|
|
141
|
+
"text": "forbidden text",
|
|
142
|
+
"case_sensitive": true # optional, default: true
|
|
143
|
+
}
|
|
144
|
+
"""
|
|
145
|
+
if "text" not in config:
|
|
146
|
+
raise ValueError("NotContainsAssertion requires 'text' in config")
|
|
147
|
+
|
|
148
|
+
return cls(
|
|
149
|
+
text=config["text"],
|
|
150
|
+
case_sensitive=config.get("case_sensitive", True),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def __repr__(self) -> str:
|
|
154
|
+
return f"NotContainsAssertion(text={self.text!r}, case_sensitive={self.case_sensitive})"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class RegexAssertion(BaseAssertion):
|
|
158
|
+
"""Assert that output matches a regular expression pattern.
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
>>> assertion = RegexAssertion(pattern=r"\\d{3}-\\d{4}")
|
|
162
|
+
>>> result = assertion.evaluate(output="Call 555-1234", expected=None, trace=None)
|
|
163
|
+
>>> assert result.passed
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(self, pattern: str, flags: int = 0):
|
|
167
|
+
"""Initialize regex assertion.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
pattern: Regular expression pattern to match
|
|
171
|
+
flags: Optional regex flags (e.g., re.IGNORECASE)
|
|
172
|
+
"""
|
|
173
|
+
self.pattern = pattern
|
|
174
|
+
self.flags = flags
|
|
175
|
+
self._compiled = re.compile(pattern, flags)
|
|
176
|
+
|
|
177
|
+
def evaluate(
|
|
178
|
+
self,
|
|
179
|
+
output: Any,
|
|
180
|
+
expected: Any | None,
|
|
181
|
+
trace: list[Span] | None,
|
|
182
|
+
) -> AssertionResult:
|
|
183
|
+
"""Check if output matches the regex pattern."""
|
|
184
|
+
output_str = str(output)
|
|
185
|
+
match = self._compiled.search(output_str)
|
|
186
|
+
passed = match is not None
|
|
187
|
+
|
|
188
|
+
if passed:
|
|
189
|
+
matched_text = match.group(0) if match else ""
|
|
190
|
+
message = f"Output matches pattern '{self.pattern}' (matched: '{matched_text}')"
|
|
191
|
+
details = {
|
|
192
|
+
"matched_text": matched_text,
|
|
193
|
+
"match_start": match.start(),
|
|
194
|
+
"match_end": match.end(),
|
|
195
|
+
}
|
|
196
|
+
else:
|
|
197
|
+
message = f"Output does not match pattern '{self.pattern}'"
|
|
198
|
+
details = {}
|
|
199
|
+
|
|
200
|
+
return AssertionResult(
|
|
201
|
+
passed=passed,
|
|
202
|
+
assertion_type="regex",
|
|
203
|
+
message=message,
|
|
204
|
+
expected=self.pattern,
|
|
205
|
+
actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
|
|
206
|
+
details=details,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
@classmethod
|
|
210
|
+
def from_config(cls, config: dict[str, Any]) -> RegexAssertion:
|
|
211
|
+
"""Create from configuration.
|
|
212
|
+
|
|
213
|
+
Config format:
|
|
214
|
+
{
|
|
215
|
+
"pattern": "\\d{3}-\\d{4}",
|
|
216
|
+
"flags": 2 # optional, e.g., re.IGNORECASE
|
|
217
|
+
}
|
|
218
|
+
"""
|
|
219
|
+
if "pattern" not in config:
|
|
220
|
+
raise ValueError("RegexAssertion requires 'pattern' in config")
|
|
221
|
+
|
|
222
|
+
return cls(
|
|
223
|
+
pattern=config["pattern"],
|
|
224
|
+
flags=config.get("flags", 0),
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def __repr__(self) -> str:
|
|
228
|
+
return f"RegexAssertion(pattern={self.pattern!r}, flags={self.flags})"
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class LengthAssertion(BaseAssertion):
|
|
232
|
+
"""Assert that output length is within specified bounds.
|
|
233
|
+
|
|
234
|
+
Example:
|
|
235
|
+
>>> assertion = LengthAssertion(min_length=10, max_length=100)
|
|
236
|
+
>>> result = assertion.evaluate(output="Hello, world!", expected=None, trace=None)
|
|
237
|
+
>>> assert result.passed
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
def __init__(self, min_length: int | None = None, max_length: int | None = None):
|
|
241
|
+
"""Initialize length assertion.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
min_length: Minimum acceptable length (inclusive)
|
|
245
|
+
max_length: Maximum acceptable length (inclusive)
|
|
246
|
+
|
|
247
|
+
Raises:
|
|
248
|
+
ValueError: If both min_length and max_length are None
|
|
249
|
+
"""
|
|
250
|
+
if min_length is None and max_length is None:
|
|
251
|
+
raise ValueError("At least one of min_length or max_length must be specified")
|
|
252
|
+
|
|
253
|
+
if min_length is not None and min_length < 0:
|
|
254
|
+
raise ValueError("min_length must be non-negative")
|
|
255
|
+
|
|
256
|
+
if max_length is not None and max_length < 0:
|
|
257
|
+
raise ValueError("max_length must be non-negative")
|
|
258
|
+
|
|
259
|
+
if min_length is not None and max_length is not None and min_length > max_length:
|
|
260
|
+
raise ValueError("min_length cannot be greater than max_length")
|
|
261
|
+
|
|
262
|
+
self.min_length = min_length
|
|
263
|
+
self.max_length = max_length
|
|
264
|
+
|
|
265
|
+
def evaluate(
|
|
266
|
+
self,
|
|
267
|
+
output: Any,
|
|
268
|
+
expected: Any | None,
|
|
269
|
+
trace: list[Span] | None,
|
|
270
|
+
) -> AssertionResult:
|
|
271
|
+
"""Check if output length is within bounds."""
|
|
272
|
+
output_str = str(output)
|
|
273
|
+
actual_length = len(output_str)
|
|
274
|
+
|
|
275
|
+
passed = True
|
|
276
|
+
reasons = []
|
|
277
|
+
|
|
278
|
+
if self.min_length is not None and actual_length < self.min_length:
|
|
279
|
+
passed = False
|
|
280
|
+
reasons.append(f"too short (< {self.min_length})")
|
|
281
|
+
|
|
282
|
+
if self.max_length is not None and actual_length > self.max_length:
|
|
283
|
+
passed = False
|
|
284
|
+
reasons.append(f"too long (> {self.max_length})")
|
|
285
|
+
|
|
286
|
+
if passed:
|
|
287
|
+
if self.min_length is not None and self.max_length is not None:
|
|
288
|
+
message = f"Output length {actual_length} is within bounds [{self.min_length}, {self.max_length}]"
|
|
289
|
+
elif self.min_length is not None:
|
|
290
|
+
message = f"Output length {actual_length} is >= {self.min_length}"
|
|
291
|
+
else:
|
|
292
|
+
message = f"Output length {actual_length} is <= {self.max_length}"
|
|
293
|
+
else:
|
|
294
|
+
message = f"Output length {actual_length} is {', '.join(reasons)}"
|
|
295
|
+
|
|
296
|
+
expected_desc = []
|
|
297
|
+
if self.min_length is not None:
|
|
298
|
+
expected_desc.append(f"min: {self.min_length}")
|
|
299
|
+
if self.max_length is not None:
|
|
300
|
+
expected_desc.append(f"max: {self.max_length}")
|
|
301
|
+
|
|
302
|
+
return AssertionResult(
|
|
303
|
+
passed=passed,
|
|
304
|
+
assertion_type="length",
|
|
305
|
+
message=message,
|
|
306
|
+
expected=", ".join(expected_desc),
|
|
307
|
+
actual=actual_length,
|
|
308
|
+
details={
|
|
309
|
+
"min_length": self.min_length,
|
|
310
|
+
"max_length": self.max_length,
|
|
311
|
+
},
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
@classmethod
|
|
315
|
+
def from_config(cls, config: dict[str, Any]) -> LengthAssertion:
|
|
316
|
+
"""Create from configuration.
|
|
317
|
+
|
|
318
|
+
Config format:
|
|
319
|
+
{
|
|
320
|
+
"min_length": 10, # optional
|
|
321
|
+
"max_length": 100 # optional
|
|
322
|
+
}
|
|
323
|
+
"""
|
|
324
|
+
return cls(
|
|
325
|
+
min_length=config.get("min_length"),
|
|
326
|
+
max_length=config.get("max_length"),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def __repr__(self) -> str:
|
|
330
|
+
return f"LengthAssertion(min_length={self.min_length}, max_length={self.max_length})"
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class JSONValidAssertion(BaseAssertion):
|
|
334
|
+
"""Assert that output is valid JSON, optionally matching a schema.
|
|
335
|
+
|
|
336
|
+
Example:
|
|
337
|
+
>>> assertion = JSONValidAssertion()
|
|
338
|
+
>>> result = assertion.evaluate(output='{"key": "value"}', expected=None, trace=None)
|
|
339
|
+
>>> assert result.passed
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
def __init__(self, schema: dict[str, Any] | None = None):
|
|
343
|
+
"""Initialize JSON validation assertion.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
schema: Optional JSON schema to validate against (using jsonschema library)
|
|
347
|
+
"""
|
|
348
|
+
self.schema = schema
|
|
349
|
+
|
|
350
|
+
# Only import jsonschema if schema validation is requested
|
|
351
|
+
if schema is not None:
|
|
352
|
+
try:
|
|
353
|
+
import jsonschema
|
|
354
|
+
self._validator = jsonschema.Draft7Validator(schema)
|
|
355
|
+
except ImportError:
|
|
356
|
+
raise ImportError(
|
|
357
|
+
"jsonschema library required for schema validation. "
|
|
358
|
+
"Install with: pip install jsonschema"
|
|
359
|
+
)
|
|
360
|
+
else:
|
|
361
|
+
self._validator = None
|
|
362
|
+
|
|
363
|
+
def evaluate(
|
|
364
|
+
self,
|
|
365
|
+
output: Any,
|
|
366
|
+
expected: Any | None,
|
|
367
|
+
trace: list[Span] | None,
|
|
368
|
+
) -> AssertionResult:
|
|
369
|
+
"""Check if output is valid JSON and optionally matches schema."""
|
|
370
|
+
output_str = str(output)
|
|
371
|
+
|
|
372
|
+
# First, check if it's valid JSON
|
|
373
|
+
try:
|
|
374
|
+
parsed = json.loads(output_str)
|
|
375
|
+
except json.JSONDecodeError as e:
|
|
376
|
+
return AssertionResult(
|
|
377
|
+
passed=False,
|
|
378
|
+
assertion_type="json_valid",
|
|
379
|
+
message=f"Output is not valid JSON: {e.msg}",
|
|
380
|
+
expected="valid JSON",
|
|
381
|
+
actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
|
|
382
|
+
details={"error": str(e), "position": e.pos},
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
# If no schema, we're done
|
|
386
|
+
if self._validator is None:
|
|
387
|
+
return AssertionResult(
|
|
388
|
+
passed=True,
|
|
389
|
+
assertion_type="json_valid",
|
|
390
|
+
message="Output is valid JSON",
|
|
391
|
+
expected="valid JSON",
|
|
392
|
+
actual=parsed,
|
|
393
|
+
details={"type": type(parsed).__name__},
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Validate against schema
|
|
397
|
+
errors = list(self._validator.iter_errors(parsed))
|
|
398
|
+
|
|
399
|
+
if not errors:
|
|
400
|
+
return AssertionResult(
|
|
401
|
+
passed=True,
|
|
402
|
+
assertion_type="json_valid",
|
|
403
|
+
message="Output is valid JSON and matches schema",
|
|
404
|
+
expected="valid JSON matching schema",
|
|
405
|
+
actual=parsed,
|
|
406
|
+
details={"schema_valid": True},
|
|
407
|
+
)
|
|
408
|
+
else:
|
|
409
|
+
error_messages = [f"{e.json_path}: {e.message}" for e in errors[:3]]
|
|
410
|
+
if len(errors) > 3:
|
|
411
|
+
error_messages.append(f"... and {len(errors) - 3} more errors")
|
|
412
|
+
|
|
413
|
+
return AssertionResult(
|
|
414
|
+
passed=False,
|
|
415
|
+
assertion_type="json_valid",
|
|
416
|
+
message=f"Output is valid JSON but does not match schema: {'; '.join(error_messages)}",
|
|
417
|
+
expected="valid JSON matching schema",
|
|
418
|
+
actual=parsed,
|
|
419
|
+
details={
|
|
420
|
+
"schema_valid": False,
|
|
421
|
+
"error_count": len(errors),
|
|
422
|
+
"errors": error_messages,
|
|
423
|
+
},
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
@classmethod
|
|
427
|
+
def from_config(cls, config: dict[str, Any]) -> JSONValidAssertion:
|
|
428
|
+
"""Create from configuration.
|
|
429
|
+
|
|
430
|
+
Config format:
|
|
431
|
+
{
|
|
432
|
+
"schema": { # optional
|
|
433
|
+
"type": "object",
|
|
434
|
+
"properties": {
|
|
435
|
+
"name": {"type": "string"}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
"""
|
|
440
|
+
return cls(schema=config.get("schema"))
|
|
441
|
+
|
|
442
|
+
def __repr__(self) -> str:
|
|
443
|
+
return f"JSONValidAssertion(schema={self.schema is not None})"
|