prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,223 @@
1
+ """
2
+ Semantic assertions using embedding-based similarity.
3
+
4
+ Requires sentence-transformers library:
5
+ pip install sentence-transformers
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ from typing import Any
12
+
13
+ # Check tier before allowing semantic assertions
14
+ from prela.license import check_tier
15
+
16
+ if not check_tier("Semantic assertions", "lunch-money", silent=False):
17
+ raise ImportError(
18
+ "Semantic assertions require 'lunch-money' subscription or higher. "
19
+ "Upgrade at https://prela.dev/pricing"
20
+ )
21
+
22
+ try:
23
+ from sentence_transformers import SentenceTransformer
24
+ import numpy as np
25
+
26
+ SENTENCE_TRANSFORMERS_AVAILABLE = True
27
+ except ImportError:
28
+ SENTENCE_TRANSFORMERS_AVAILABLE = False
29
+ SentenceTransformer = None
30
+ np = None
31
+
32
+ from prela.core.span import Span
33
+ from prela.evals.assertions.base import AssertionResult, BaseAssertion
34
+
35
+
36
+ class SemanticSimilarityAssertion(BaseAssertion):
37
+ """Assert that output is semantically similar to expected text.
38
+
39
+ Uses sentence embeddings to compare semantic meaning rather than exact
40
+ text matching. Useful for evaluating LLM outputs where phrasing varies
41
+ but meaning should be consistent.
42
+
43
+ Example:
44
+ >>> assertion = SemanticSimilarityAssertion(
45
+ ... expected_text="The weather is nice today",
46
+ ... threshold=0.8
47
+ ... )
48
+ >>> result = assertion.evaluate(
49
+ ... output="Today has beautiful weather",
50
+ ... expected=None,
51
+ ... trace=None
52
+ ... )
53
+ >>> assert result.passed # High similarity despite different wording
54
+
55
+ Requires:
56
+ pip install sentence-transformers
57
+ """
58
+
59
+ # Class-level model cache (shared across instances)
60
+ _model_cache: dict[str, Any] = {}
61
+
62
+ # Embedding cache (to avoid recomputing for same text)
63
+ _embedding_cache: dict[str, Any] = {}
64
+
65
+ def __init__(
66
+ self,
67
+ expected_text: str,
68
+ threshold: float = 0.8,
69
+ model_name: str = "all-MiniLM-L6-v2",
70
+ ):
71
+ """Initialize semantic similarity assertion.
72
+
73
+ Args:
74
+ expected_text: Text to compare against
75
+ threshold: Minimum cosine similarity score (0-1) to pass
76
+ model_name: Sentence transformer model to use
77
+ (default: all-MiniLM-L6-v2, fast and accurate)
78
+
79
+ Raises:
80
+ ImportError: If sentence-transformers is not installed
81
+ ValueError: If threshold is not between 0 and 1
82
+ """
83
+ if not SENTENCE_TRANSFORMERS_AVAILABLE:
84
+ raise ImportError(
85
+ "sentence-transformers required for semantic similarity. "
86
+ "Install with: pip install sentence-transformers"
87
+ )
88
+
89
+ if not 0 <= threshold <= 1:
90
+ raise ValueError(f"threshold must be between 0 and 1, got {threshold}")
91
+
92
+ self.expected_text = expected_text
93
+ self.threshold = threshold
94
+ self.model_name = model_name
95
+
96
+ # Load model (cached at class level)
97
+ if model_name not in self._model_cache:
98
+ self._model_cache[model_name] = SentenceTransformer(model_name)
99
+
100
+ self.model = self._model_cache[model_name]
101
+
102
+ def _get_embedding(self, text: str) -> Any:
103
+ """Get embedding for text, using cache if available."""
104
+ # Create cache key from text hash
105
+ text_hash = hashlib.md5(text.encode()).hexdigest()
106
+ cache_key = f"{self.model_name}:{text_hash}"
107
+
108
+ if cache_key not in self._embedding_cache:
109
+ self._embedding_cache[cache_key] = self.model.encode(
110
+ text,
111
+ convert_to_numpy=True,
112
+ normalize_embeddings=True,
113
+ )
114
+
115
+ return self._embedding_cache[cache_key]
116
+
117
+ def _cosine_similarity(self, embedding1: Any, embedding2: Any) -> float:
118
+ """Compute cosine similarity between two embeddings."""
119
+ # Embeddings are already normalized, so dot product = cosine similarity
120
+ return float(np.dot(embedding1, embedding2))
121
+
122
+ def evaluate(
123
+ self,
124
+ output: Any,
125
+ expected: Any | None,
126
+ trace: list[Span] | None,
127
+ ) -> AssertionResult:
128
+ """Check if output is semantically similar to expected text."""
129
+ output_str = str(output)
130
+
131
+ # Get embeddings
132
+ try:
133
+ output_embedding = self._get_embedding(output_str)
134
+ expected_embedding = self._get_embedding(self.expected_text)
135
+ except Exception as e:
136
+ return AssertionResult(
137
+ passed=False,
138
+ assertion_type="semantic_similarity",
139
+ message=f"Failed to compute embeddings: {e}",
140
+ expected=self.expected_text,
141
+ actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
142
+ details={"error": str(e)},
143
+ )
144
+
145
+ # Compute similarity
146
+ similarity = self._cosine_similarity(output_embedding, expected_embedding)
147
+ passed = similarity >= self.threshold
148
+
149
+ # Determine score interpretation
150
+ if similarity >= 0.9:
151
+ interpretation = "very high"
152
+ elif similarity >= 0.8:
153
+ interpretation = "high"
154
+ elif similarity >= 0.7:
155
+ interpretation = "moderate"
156
+ elif similarity >= 0.6:
157
+ interpretation = "low"
158
+ else:
159
+ interpretation = "very low"
160
+
161
+ if passed:
162
+ message = (
163
+ f"Output is semantically similar to expected "
164
+ f"(similarity: {similarity:.3f}, {interpretation})"
165
+ )
166
+ else:
167
+ message = (
168
+ f"Output is not semantically similar enough "
169
+ f"(similarity: {similarity:.3f} < threshold: {self.threshold}, {interpretation})"
170
+ )
171
+
172
+ return AssertionResult(
173
+ passed=passed,
174
+ assertion_type="semantic_similarity",
175
+ message=message,
176
+ score=similarity,
177
+ expected=self.expected_text,
178
+ actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
179
+ details={
180
+ "similarity": similarity,
181
+ "threshold": self.threshold,
182
+ "interpretation": interpretation,
183
+ "model": self.model_name,
184
+ },
185
+ )
186
+
187
+ @classmethod
188
+ def from_config(cls, config: dict[str, Any]) -> SemanticSimilarityAssertion:
189
+ """Create from configuration.
190
+
191
+ Config format:
192
+ {
193
+ "expected_text": "The expected output",
194
+ "threshold": 0.8, # optional, default: 0.8
195
+ "model_name": "all-MiniLM-L6-v2" # optional
196
+ }
197
+ """
198
+ if "expected_text" not in config:
199
+ raise ValueError("SemanticSimilarityAssertion requires 'expected_text' in config")
200
+
201
+ return cls(
202
+ expected_text=config["expected_text"],
203
+ threshold=config.get("threshold", 0.8),
204
+ model_name=config.get("model_name", "all-MiniLM-L6-v2"),
205
+ )
206
+
207
+ def __repr__(self) -> str:
208
+ return (
209
+ f"SemanticSimilarityAssertion("
210
+ f"expected_text={self.expected_text[:30]!r}..., "
211
+ f"threshold={self.threshold}, "
212
+ f"model_name={self.model_name!r})"
213
+ )
214
+
215
+ @classmethod
216
+ def clear_cache(cls) -> None:
217
+ """Clear the embedding cache. Useful for testing or memory management."""
218
+ cls._embedding_cache.clear()
219
+
220
+ @classmethod
221
+ def get_cache_size(cls) -> int:
222
+ """Get the number of cached embeddings."""
223
+ return len(cls._embedding_cache)
@@ -0,0 +1,443 @@
1
+ """
2
+ Structural assertions for text and data format validation.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import re
9
+ from typing import Any
10
+
11
+ from prela.core.span import Span
12
+ from prela.evals.assertions.base import AssertionResult, BaseAssertion
13
+
14
+
15
+ class ContainsAssertion(BaseAssertion):
16
+ """Assert that output contains specified text.
17
+
18
+ Example:
19
+ >>> assertion = ContainsAssertion(text="error", case_sensitive=False)
20
+ >>> result = assertion.evaluate(output="Error occurred", expected=None, trace=None)
21
+ >>> assert result.passed
22
+ """
23
+
24
+ def __init__(self, text: str, case_sensitive: bool = True):
25
+ """Initialize contains assertion.
26
+
27
+ Args:
28
+ text: Text that must be present in output
29
+ case_sensitive: Whether to perform case-sensitive matching
30
+ """
31
+ self.text = text
32
+ self.case_sensitive = case_sensitive
33
+
34
+ def evaluate(
35
+ self,
36
+ output: Any,
37
+ expected: Any | None,
38
+ trace: list[Span] | None,
39
+ ) -> AssertionResult:
40
+ """Check if output contains the specified text."""
41
+ output_str = str(output)
42
+ text = self.text
43
+
44
+ if not self.case_sensitive:
45
+ output_str = output_str.lower()
46
+ text = text.lower()
47
+
48
+ passed = text in output_str
49
+
50
+ if passed:
51
+ message = f"Output contains '{self.text}'"
52
+ else:
53
+ message = f"Output does not contain '{self.text}'"
54
+
55
+ return AssertionResult(
56
+ passed=passed,
57
+ assertion_type="contains",
58
+ message=message,
59
+ expected=self.text,
60
+ actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
61
+ details={"case_sensitive": self.case_sensitive},
62
+ )
63
+
64
+ @classmethod
65
+ def from_config(cls, config: dict[str, Any]) -> ContainsAssertion:
66
+ """Create from configuration.
67
+
68
+ Config format:
69
+ {
70
+ "text": "required text",
71
+ "case_sensitive": true # optional, default: true
72
+ }
73
+ """
74
+ if "text" not in config:
75
+ raise ValueError("ContainsAssertion requires 'text' in config")
76
+
77
+ return cls(
78
+ text=config["text"],
79
+ case_sensitive=config.get("case_sensitive", True),
80
+ )
81
+
82
+ def __repr__(self) -> str:
83
+ return f"ContainsAssertion(text={self.text!r}, case_sensitive={self.case_sensitive})"
84
+
85
+
86
+ class NotContainsAssertion(BaseAssertion):
87
+ """Assert that output does NOT contain specified text.
88
+
89
+ Example:
90
+ >>> assertion = NotContainsAssertion(text="error")
91
+ >>> result = assertion.evaluate(output="Success!", expected=None, trace=None)
92
+ >>> assert result.passed
93
+ """
94
+
95
+ def __init__(self, text: str, case_sensitive: bool = True):
96
+ """Initialize not-contains assertion.
97
+
98
+ Args:
99
+ text: Text that must NOT be present in output
100
+ case_sensitive: Whether to perform case-sensitive matching
101
+ """
102
+ self.text = text
103
+ self.case_sensitive = case_sensitive
104
+
105
+ def evaluate(
106
+ self,
107
+ output: Any,
108
+ expected: Any | None,
109
+ trace: list[Span] | None,
110
+ ) -> AssertionResult:
111
+ """Check if output does not contain the specified text."""
112
+ output_str = str(output)
113
+ text = self.text
114
+
115
+ if not self.case_sensitive:
116
+ output_str = output_str.lower()
117
+ text = text.lower()
118
+
119
+ passed = text not in output_str
120
+
121
+ if passed:
122
+ message = f"Output correctly does not contain '{self.text}'"
123
+ else:
124
+ message = f"Output incorrectly contains '{self.text}'"
125
+
126
+ return AssertionResult(
127
+ passed=passed,
128
+ assertion_type="not_contains",
129
+ message=message,
130
+ expected=f"not containing '{self.text}'",
131
+ actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
132
+ details={"case_sensitive": self.case_sensitive},
133
+ )
134
+
135
+ @classmethod
136
+ def from_config(cls, config: dict[str, Any]) -> NotContainsAssertion:
137
+ """Create from configuration.
138
+
139
+ Config format:
140
+ {
141
+ "text": "forbidden text",
142
+ "case_sensitive": true # optional, default: true
143
+ }
144
+ """
145
+ if "text" not in config:
146
+ raise ValueError("NotContainsAssertion requires 'text' in config")
147
+
148
+ return cls(
149
+ text=config["text"],
150
+ case_sensitive=config.get("case_sensitive", True),
151
+ )
152
+
153
+ def __repr__(self) -> str:
154
+ return f"NotContainsAssertion(text={self.text!r}, case_sensitive={self.case_sensitive})"
155
+
156
+
157
+ class RegexAssertion(BaseAssertion):
158
+ """Assert that output matches a regular expression pattern.
159
+
160
+ Example:
161
+ >>> assertion = RegexAssertion(pattern=r"\\d{3}-\\d{4}")
162
+ >>> result = assertion.evaluate(output="Call 555-1234", expected=None, trace=None)
163
+ >>> assert result.passed
164
+ """
165
+
166
+ def __init__(self, pattern: str, flags: int = 0):
167
+ """Initialize regex assertion.
168
+
169
+ Args:
170
+ pattern: Regular expression pattern to match
171
+ flags: Optional regex flags (e.g., re.IGNORECASE)
172
+ """
173
+ self.pattern = pattern
174
+ self.flags = flags
175
+ self._compiled = re.compile(pattern, flags)
176
+
177
+ def evaluate(
178
+ self,
179
+ output: Any,
180
+ expected: Any | None,
181
+ trace: list[Span] | None,
182
+ ) -> AssertionResult:
183
+ """Check if output matches the regex pattern."""
184
+ output_str = str(output)
185
+ match = self._compiled.search(output_str)
186
+ passed = match is not None
187
+
188
+ if passed:
189
+ matched_text = match.group(0) if match else ""
190
+ message = f"Output matches pattern '{self.pattern}' (matched: '{matched_text}')"
191
+ details = {
192
+ "matched_text": matched_text,
193
+ "match_start": match.start(),
194
+ "match_end": match.end(),
195
+ }
196
+ else:
197
+ message = f"Output does not match pattern '{self.pattern}'"
198
+ details = {}
199
+
200
+ return AssertionResult(
201
+ passed=passed,
202
+ assertion_type="regex",
203
+ message=message,
204
+ expected=self.pattern,
205
+ actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
206
+ details=details,
207
+ )
208
+
209
+ @classmethod
210
+ def from_config(cls, config: dict[str, Any]) -> RegexAssertion:
211
+ """Create from configuration.
212
+
213
+ Config format:
214
+ {
215
+ "pattern": "\\d{3}-\\d{4}",
216
+ "flags": 2 # optional, e.g., re.IGNORECASE
217
+ }
218
+ """
219
+ if "pattern" not in config:
220
+ raise ValueError("RegexAssertion requires 'pattern' in config")
221
+
222
+ return cls(
223
+ pattern=config["pattern"],
224
+ flags=config.get("flags", 0),
225
+ )
226
+
227
+ def __repr__(self) -> str:
228
+ return f"RegexAssertion(pattern={self.pattern!r}, flags={self.flags})"
229
+
230
+
231
+ class LengthAssertion(BaseAssertion):
232
+ """Assert that output length is within specified bounds.
233
+
234
+ Example:
235
+ >>> assertion = LengthAssertion(min_length=10, max_length=100)
236
+ >>> result = assertion.evaluate(output="Hello, world!", expected=None, trace=None)
237
+ >>> assert result.passed
238
+ """
239
+
240
+ def __init__(self, min_length: int | None = None, max_length: int | None = None):
241
+ """Initialize length assertion.
242
+
243
+ Args:
244
+ min_length: Minimum acceptable length (inclusive)
245
+ max_length: Maximum acceptable length (inclusive)
246
+
247
+ Raises:
248
+ ValueError: If both min_length and max_length are None
249
+ """
250
+ if min_length is None and max_length is None:
251
+ raise ValueError("At least one of min_length or max_length must be specified")
252
+
253
+ if min_length is not None and min_length < 0:
254
+ raise ValueError("min_length must be non-negative")
255
+
256
+ if max_length is not None and max_length < 0:
257
+ raise ValueError("max_length must be non-negative")
258
+
259
+ if min_length is not None and max_length is not None and min_length > max_length:
260
+ raise ValueError("min_length cannot be greater than max_length")
261
+
262
+ self.min_length = min_length
263
+ self.max_length = max_length
264
+
265
+ def evaluate(
266
+ self,
267
+ output: Any,
268
+ expected: Any | None,
269
+ trace: list[Span] | None,
270
+ ) -> AssertionResult:
271
+ """Check if output length is within bounds."""
272
+ output_str = str(output)
273
+ actual_length = len(output_str)
274
+
275
+ passed = True
276
+ reasons = []
277
+
278
+ if self.min_length is not None and actual_length < self.min_length:
279
+ passed = False
280
+ reasons.append(f"too short (< {self.min_length})")
281
+
282
+ if self.max_length is not None and actual_length > self.max_length:
283
+ passed = False
284
+ reasons.append(f"too long (> {self.max_length})")
285
+
286
+ if passed:
287
+ if self.min_length is not None and self.max_length is not None:
288
+ message = f"Output length {actual_length} is within bounds [{self.min_length}, {self.max_length}]"
289
+ elif self.min_length is not None:
290
+ message = f"Output length {actual_length} is >= {self.min_length}"
291
+ else:
292
+ message = f"Output length {actual_length} is <= {self.max_length}"
293
+ else:
294
+ message = f"Output length {actual_length} is {', '.join(reasons)}"
295
+
296
+ expected_desc = []
297
+ if self.min_length is not None:
298
+ expected_desc.append(f"min: {self.min_length}")
299
+ if self.max_length is not None:
300
+ expected_desc.append(f"max: {self.max_length}")
301
+
302
+ return AssertionResult(
303
+ passed=passed,
304
+ assertion_type="length",
305
+ message=message,
306
+ expected=", ".join(expected_desc),
307
+ actual=actual_length,
308
+ details={
309
+ "min_length": self.min_length,
310
+ "max_length": self.max_length,
311
+ },
312
+ )
313
+
314
+ @classmethod
315
+ def from_config(cls, config: dict[str, Any]) -> LengthAssertion:
316
+ """Create from configuration.
317
+
318
+ Config format:
319
+ {
320
+ "min_length": 10, # optional
321
+ "max_length": 100 # optional
322
+ }
323
+ """
324
+ return cls(
325
+ min_length=config.get("min_length"),
326
+ max_length=config.get("max_length"),
327
+ )
328
+
329
+ def __repr__(self) -> str:
330
+ return f"LengthAssertion(min_length={self.min_length}, max_length={self.max_length})"
331
+
332
+
333
+ class JSONValidAssertion(BaseAssertion):
334
+ """Assert that output is valid JSON, optionally matching a schema.
335
+
336
+ Example:
337
+ >>> assertion = JSONValidAssertion()
338
+ >>> result = assertion.evaluate(output='{"key": "value"}', expected=None, trace=None)
339
+ >>> assert result.passed
340
+ """
341
+
342
+ def __init__(self, schema: dict[str, Any] | None = None):
343
+ """Initialize JSON validation assertion.
344
+
345
+ Args:
346
+ schema: Optional JSON schema to validate against (using jsonschema library)
347
+ """
348
+ self.schema = schema
349
+
350
+ # Only import jsonschema if schema validation is requested
351
+ if schema is not None:
352
+ try:
353
+ import jsonschema
354
+ self._validator = jsonschema.Draft7Validator(schema)
355
+ except ImportError:
356
+ raise ImportError(
357
+ "jsonschema library required for schema validation. "
358
+ "Install with: pip install jsonschema"
359
+ )
360
+ else:
361
+ self._validator = None
362
+
363
+ def evaluate(
364
+ self,
365
+ output: Any,
366
+ expected: Any | None,
367
+ trace: list[Span] | None,
368
+ ) -> AssertionResult:
369
+ """Check if output is valid JSON and optionally matches schema."""
370
+ output_str = str(output)
371
+
372
+ # First, check if it's valid JSON
373
+ try:
374
+ parsed = json.loads(output_str)
375
+ except json.JSONDecodeError as e:
376
+ return AssertionResult(
377
+ passed=False,
378
+ assertion_type="json_valid",
379
+ message=f"Output is not valid JSON: {e.msg}",
380
+ expected="valid JSON",
381
+ actual=output_str[:100] + "..." if len(output_str) > 100 else output_str,
382
+ details={"error": str(e), "position": e.pos},
383
+ )
384
+
385
+ # If no schema, we're done
386
+ if self._validator is None:
387
+ return AssertionResult(
388
+ passed=True,
389
+ assertion_type="json_valid",
390
+ message="Output is valid JSON",
391
+ expected="valid JSON",
392
+ actual=parsed,
393
+ details={"type": type(parsed).__name__},
394
+ )
395
+
396
+ # Validate against schema
397
+ errors = list(self._validator.iter_errors(parsed))
398
+
399
+ if not errors:
400
+ return AssertionResult(
401
+ passed=True,
402
+ assertion_type="json_valid",
403
+ message="Output is valid JSON and matches schema",
404
+ expected="valid JSON matching schema",
405
+ actual=parsed,
406
+ details={"schema_valid": True},
407
+ )
408
+ else:
409
+ error_messages = [f"{e.json_path}: {e.message}" for e in errors[:3]]
410
+ if len(errors) > 3:
411
+ error_messages.append(f"... and {len(errors) - 3} more errors")
412
+
413
+ return AssertionResult(
414
+ passed=False,
415
+ assertion_type="json_valid",
416
+ message=f"Output is valid JSON but does not match schema: {'; '.join(error_messages)}",
417
+ expected="valid JSON matching schema",
418
+ actual=parsed,
419
+ details={
420
+ "schema_valid": False,
421
+ "error_count": len(errors),
422
+ "errors": error_messages,
423
+ },
424
+ )
425
+
426
+ @classmethod
427
+ def from_config(cls, config: dict[str, Any]) -> JSONValidAssertion:
428
+ """Create from configuration.
429
+
430
+ Config format:
431
+ {
432
+ "schema": { # optional
433
+ "type": "object",
434
+ "properties": {
435
+ "name": {"type": "string"}
436
+ }
437
+ }
438
+ }
439
+ """
440
+ return cls(schema=config.get("schema"))
441
+
442
+ def __repr__(self) -> str:
443
+ return f"JSONValidAssertion(schema={self.schema is not None})"