prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
prela/evals/suite.py ADDED
@@ -0,0 +1,316 @@
1
+ """Eval suite for organizing and managing test cases.
2
+
3
+ This module provides the EvalSuite class for organizing multiple eval cases,
4
+ with support for YAML serialization, setup/teardown hooks, and default assertions.
5
+
6
+ Example:
7
+ >>> from prela.evals import EvalSuite, EvalCase, EvalInput, EvalExpected
8
+ >>> suite = EvalSuite(
9
+ ... name="RAG Quality Suite",
10
+ ... description="Tests for RAG pipeline quality",
11
+ ... cases=[
12
+ ... EvalCase(
13
+ ... id="test_basic_qa",
14
+ ... name="Basic QA test",
15
+ ... input=EvalInput(query="What is 2+2?"),
16
+ ... expected=EvalExpected(contains=["4"])
17
+ ... )
18
+ ... ]
19
+ ... )
20
+ >>> suite.to_yaml("suite.yaml")
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Any, Callable
28
+
29
+ try:
30
+ import yaml
31
+
32
+ YAML_AVAILABLE = True
33
+ except ImportError:
34
+ YAML_AVAILABLE = False
35
+
36
+ from prela.evals.case import EvalCase
37
+
38
+
39
+ @dataclass
40
+ class EvalSuite:
41
+ """Collection of eval cases with shared configuration.
42
+
43
+ An eval suite organizes multiple test cases with:
44
+ - Shared setup/teardown hooks
45
+ - Default assertions applied to all cases
46
+ - YAML serialization for easy configuration
47
+ - Tagging and filtering capabilities
48
+
49
+ Attributes:
50
+ name: Suite name (e.g., "RAG Quality Suite")
51
+ description: Human-readable description of what this suite tests
52
+ cases: List of eval cases in this suite
53
+ default_assertions: Assertions applied to all cases (unless overridden)
54
+ setup: Callable run before executing the suite (e.g., start services)
55
+ teardown: Callable run after executing the suite (e.g., cleanup)
56
+ metadata: Additional metadata for the suite
57
+
58
+ Example:
59
+ >>> suite = EvalSuite(
60
+ ... name="RAG Quality Suite",
61
+ ... description="Tests for RAG pipeline quality",
62
+ ... cases=[
63
+ ... EvalCase(
64
+ ... id="test_basic_qa",
65
+ ... name="Basic factual question",
66
+ ... input=EvalInput(query="What is the capital of France?"),
67
+ ... expected=EvalExpected(contains=["Paris"])
68
+ ... )
69
+ ... ],
70
+ ... default_assertions=[
71
+ ... {"type": "latency", "max_ms": 5000},
72
+ ... {"type": "no_errors"}
73
+ ... ]
74
+ ... )
75
+ """
76
+
77
+ name: str
78
+ description: str = ""
79
+ cases: list[EvalCase] = field(default_factory=list)
80
+ default_assertions: list[dict[str, Any]] | None = None
81
+ setup: Callable[[], None] | None = None
82
+ teardown: Callable[[], None] | None = None
83
+ metadata: dict[str, Any] = field(default_factory=dict)
84
+
85
+ def __post_init__(self) -> None:
86
+ """Validate suite configuration."""
87
+ if not self.name:
88
+ raise ValueError("EvalSuite must have a non-empty 'name'")
89
+
90
+ def add_case(self, case: EvalCase) -> None:
91
+ """Add a test case to the suite.
92
+
93
+ Args:
94
+ case: Eval case to add
95
+
96
+ Example:
97
+ >>> suite = EvalSuite(name="My Suite")
98
+ >>> case = EvalCase(
99
+ ... id="test_1",
100
+ ... name="Test",
101
+ ... input=EvalInput(query="Hello"),
102
+ ... expected=EvalExpected(contains=["Hi"])
103
+ ... )
104
+ >>> suite.add_case(case)
105
+ """
106
+ self.cases.append(case)
107
+
108
+ def get_case(self, case_id: str) -> EvalCase | None:
109
+ """Get a test case by ID.
110
+
111
+ Args:
112
+ case_id: ID of the test case to retrieve
113
+
114
+ Returns:
115
+ EvalCase if found, None otherwise
116
+
117
+ Example:
118
+ >>> suite = EvalSuite(name="My Suite", cases=[...])
119
+ >>> case = suite.get_case("test_basic_qa")
120
+ """
121
+ for case in self.cases:
122
+ if case.id == case_id:
123
+ return case
124
+ return None
125
+
126
+ def filter_by_tags(self, tags: list[str]) -> list[EvalCase]:
127
+ """Filter test cases by tags.
128
+
129
+ Returns cases that have ALL specified tags.
130
+
131
+ Args:
132
+ tags: List of tags to filter by
133
+
134
+ Returns:
135
+ List of matching test cases
136
+
137
+ Example:
138
+ >>> suite = EvalSuite(name="My Suite", cases=[...])
139
+ >>> qa_cases = suite.filter_by_tags(["qa"])
140
+ >>> geography_qa = suite.filter_by_tags(["qa", "geography"])
141
+ """
142
+ return [case for case in self.cases if all(tag in case.tags for tag in tags)]
143
+
144
+ @classmethod
145
+ def from_dict(cls, data: dict[str, Any]) -> EvalSuite:
146
+ """Create EvalSuite from dictionary.
147
+
148
+ Args:
149
+ data: Dictionary with suite specification
150
+
151
+ Returns:
152
+ EvalSuite instance
153
+
154
+ Example:
155
+ >>> data = {
156
+ ... "name": "My Suite",
157
+ ... "description": "Test suite",
158
+ ... "cases": [
159
+ ... {
160
+ ... "id": "test_1",
161
+ ... "name": "Test",
162
+ ... "input": {"query": "Hello"},
163
+ ... "expected": {"contains": ["Hi"]}
164
+ ... }
165
+ ... ]
166
+ ... }
167
+ >>> suite = EvalSuite.from_dict(data)
168
+ """
169
+ # Parse cases
170
+ cases_data = data.get("cases", [])
171
+ cases = [EvalCase.from_dict(case_data) for case_data in cases_data]
172
+
173
+ return cls(
174
+ name=data["name"],
175
+ description=data.get("description", ""),
176
+ cases=cases,
177
+ default_assertions=data.get("default_assertions"),
178
+ metadata=data.get("metadata", {}),
179
+ # Note: setup/teardown can't be serialized, only set programmatically
180
+ )
181
+
182
+ def to_dict(self) -> dict[str, Any]:
183
+ """Convert to dictionary for serialization.
184
+
185
+ Returns:
186
+ Dictionary representation of the suite.
187
+
188
+ Note:
189
+ setup and teardown callables are not serialized.
190
+
191
+ Example:
192
+ >>> suite = EvalSuite(name="My Suite", cases=[...])
193
+ >>> data = suite.to_dict()
194
+ >>> data["name"]
195
+ 'My Suite'
196
+ """
197
+ result: dict[str, Any] = {
198
+ "name": self.name,
199
+ }
200
+
201
+ if self.description:
202
+ result["description"] = self.description
203
+
204
+ if len(self.cases) > 0:
205
+ result["cases"] = [case.to_dict() for case in self.cases]
206
+
207
+ if self.default_assertions is not None and len(self.default_assertions) > 0:
208
+ result["default_assertions"] = self.default_assertions
209
+
210
+ if len(self.metadata) > 0:
211
+ result["metadata"] = self.metadata
212
+
213
+ return result
214
+
215
+ @classmethod
216
+ def from_yaml(cls, path: str | Path) -> EvalSuite:
217
+ """Load eval suite from YAML file.
218
+
219
+ Args:
220
+ path: Path to YAML file
221
+
222
+ Returns:
223
+ EvalSuite instance
224
+
225
+ Raises:
226
+ ImportError: If PyYAML is not installed
227
+ FileNotFoundError: If file doesn't exist
228
+ yaml.YAMLError: If YAML parsing fails
229
+
230
+ Example:
231
+ >>> suite = EvalSuite.from_yaml("tests/suite.yaml")
232
+ """
233
+ if not YAML_AVAILABLE:
234
+ raise ImportError(
235
+ "PyYAML is required for YAML support. "
236
+ "Install with: pip install pyyaml"
237
+ )
238
+
239
+ path = Path(path)
240
+ if not path.exists():
241
+ raise FileNotFoundError(f"File not found: {path}")
242
+
243
+ with open(path, "r", encoding="utf-8") as f:
244
+ data = yaml.safe_load(f)
245
+
246
+ return cls.from_dict(data)
247
+
248
+ def to_yaml(self, path: str | Path) -> None:
249
+ """Save eval suite to YAML file.
250
+
251
+ Args:
252
+ path: Path to save YAML file
253
+
254
+ Raises:
255
+ ImportError: If PyYAML is not installed
256
+
257
+ Example:
258
+ >>> suite = EvalSuite(name="My Suite", cases=[...])
259
+ >>> suite.to_yaml("suite.yaml")
260
+ """
261
+ if not YAML_AVAILABLE:
262
+ raise ImportError(
263
+ "PyYAML is required for YAML support. "
264
+ "Install with: pip install pyyaml"
265
+ )
266
+
267
+ path = Path(path)
268
+
269
+ # Create parent directory if it doesn't exist
270
+ path.parent.mkdir(parents=True, exist_ok=True)
271
+
272
+ data = self.to_dict()
273
+
274
+ with open(path, "w", encoding="utf-8") as f:
275
+ yaml.dump(
276
+ data,
277
+ f,
278
+ default_flow_style=False,
279
+ sort_keys=False,
280
+ allow_unicode=True,
281
+ )
282
+
283
+ def __len__(self) -> int:
284
+ """Return number of test cases in suite.
285
+
286
+ Example:
287
+ >>> suite = EvalSuite(name="My Suite", cases=[case1, case2])
288
+ >>> len(suite)
289
+ 2
290
+ """
291
+ return len(self.cases)
292
+
293
+ def __iter__(self):
294
+ """Iterate over test cases.
295
+
296
+ Example:
297
+ >>> suite = EvalSuite(name="My Suite", cases=[case1, case2])
298
+ >>> for case in suite:
299
+ ... print(case.name)
300
+ """
301
+ return iter(self.cases)
302
+
303
+ def __getitem__(self, index: int) -> EvalCase:
304
+ """Get test case by index.
305
+
306
+ Args:
307
+ index: Index of the test case
308
+
309
+ Returns:
310
+ EvalCase at the specified index
311
+
312
+ Example:
313
+ >>> suite = EvalSuite(name="My Suite", cases=[case1, case2])
314
+ >>> first_case = suite[0]
315
+ """
316
+ return self.cases[index]
@@ -0,0 +1,27 @@
1
+ """Exporters for sending spans to external systems."""
2
+
3
+ from prela.exporters.base import BaseExporter, BatchExporter, ExportResult
4
+ from prela.exporters.console import ConsoleExporter
5
+ from prela.exporters.file import FileExporter
6
+ from prela.exporters.http import HTTPExporter
7
+ from prela.exporters.multi import MultiExporter
8
+
9
+ # OTLP exporter requires optional dependency
10
+ try:
11
+ from prela.exporters.otlp import OTLPExporter
12
+
13
+ OTLP_AVAILABLE = True
14
+ except ImportError:
15
+ OTLP_AVAILABLE = False
16
+ OTLPExporter = None # type: ignore
17
+
18
+ __all__ = [
19
+ "BaseExporter",
20
+ "BatchExporter",
21
+ "ExportResult",
22
+ "ConsoleExporter",
23
+ "FileExporter",
24
+ "HTTPExporter",
25
+ "MultiExporter",
26
+ "OTLPExporter",
27
+ ]
@@ -0,0 +1,189 @@
1
+ """Base classes for span exporters.
2
+
3
+ This module provides abstract base classes for implementing span exporters.
4
+ Exporters are responsible for sending completed spans to external systems
5
+ like observability platforms, databases, or files.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import time
12
+ from abc import ABC, abstractmethod
13
+ from enum import Enum
14
+ from typing import Any
15
+
16
+ from prela.core.span import Span
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ExportResult(Enum):
22
+ """Result of an export operation."""
23
+
24
+ SUCCESS = "success"
25
+ FAILURE = "failure"
26
+ RETRY = "retry"
27
+
28
+
29
+ class BaseExporter(ABC):
30
+ """Abstract base class for span exporters.
31
+
32
+ Exporters are responsible for sending spans to external systems.
33
+ Implementations must handle serialization, network requests, and error handling.
34
+ """
35
+
36
+ @abstractmethod
37
+ def export(self, spans: list[Span]) -> None:
38
+ """Export a batch of spans.
39
+
40
+ Args:
41
+ spans: List of spans to export
42
+
43
+ Raises:
44
+ Exception: If export fails and should not be retried
45
+ """
46
+ pass
47
+
48
+ @abstractmethod
49
+ def shutdown(self) -> None:
50
+ """Shutdown the exporter and flush any pending data.
51
+
52
+ This method should be called before the application exits to ensure
53
+ all spans are properly exported.
54
+ """
55
+ pass
56
+
57
+
58
+ class BatchExporter(BaseExporter):
59
+ """Base class for exporters that batch spans with retry logic.
60
+
61
+ This class handles common batching concerns:
62
+ - Retry with exponential backoff
63
+ - Timeout handling
64
+ - Error logging
65
+
66
+ Subclasses only need to implement _do_export() to define how spans
67
+ are actually sent to the backend.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ max_retries: int = 3,
73
+ initial_backoff_ms: float = 100.0,
74
+ max_backoff_ms: float = 10000.0,
75
+ timeout_ms: float = 30000.0,
76
+ ) -> None:
77
+ """Initialize the batch exporter.
78
+
79
+ Args:
80
+ max_retries: Maximum number of retry attempts
81
+ initial_backoff_ms: Initial backoff delay in milliseconds
82
+ max_backoff_ms: Maximum backoff delay in milliseconds
83
+ timeout_ms: Timeout for export operation in milliseconds
84
+ """
85
+ self.max_retries = max_retries
86
+ self.initial_backoff_ms = initial_backoff_ms
87
+ self.max_backoff_ms = max_backoff_ms
88
+ self.timeout_ms = timeout_ms
89
+ self._shutdown = False
90
+
91
+ @abstractmethod
92
+ def _do_export(self, spans: list[Span]) -> ExportResult:
93
+ """Perform the actual export operation.
94
+
95
+ This method should be implemented by subclasses to define how spans
96
+ are sent to the backend system.
97
+
98
+ Args:
99
+ spans: List of spans to export
100
+
101
+ Returns:
102
+ ExportResult indicating success, failure, or retry needed
103
+ """
104
+ pass
105
+
106
+ def export(self, spans: list[Span]) -> None:
107
+ """Export spans with retry logic.
108
+
109
+ Args:
110
+ spans: List of spans to export
111
+
112
+ Raises:
113
+ RuntimeError: If exporter is shutdown
114
+ Exception: If export fails after all retries
115
+ """
116
+ if self._shutdown:
117
+ raise RuntimeError("Cannot export: exporter is shutdown")
118
+
119
+ if not spans:
120
+ return
121
+
122
+ start_time = time.perf_counter()
123
+ attempt = 0
124
+ backoff_ms = self.initial_backoff_ms
125
+
126
+ while attempt <= self.max_retries:
127
+ # Check timeout
128
+ elapsed_ms = (time.perf_counter() - start_time) * 1000
129
+ if elapsed_ms >= self.timeout_ms:
130
+ raise TimeoutError(
131
+ f"Export timeout after {elapsed_ms:.2f}ms " f"(limit: {self.timeout_ms}ms)"
132
+ )
133
+
134
+ try:
135
+ result = self._do_export(spans)
136
+
137
+ if result == ExportResult.SUCCESS:
138
+ logger.debug(
139
+ "Successfully exported %d spans on attempt %d",
140
+ len(spans),
141
+ attempt + 1,
142
+ )
143
+ return
144
+
145
+ if result == ExportResult.FAILURE:
146
+ raise Exception(f"Export failed permanently on attempt {attempt + 1}")
147
+
148
+ # result == ExportResult.RETRY
149
+ if attempt < self.max_retries:
150
+ logger.warning(
151
+ "Export needs retry (attempt %d/%d), backing off %.2fms",
152
+ attempt + 1,
153
+ self.max_retries + 1,
154
+ backoff_ms,
155
+ )
156
+ time.sleep(backoff_ms / 1000)
157
+ backoff_ms = min(backoff_ms * 2, self.max_backoff_ms)
158
+
159
+ except Exception as e:
160
+ if attempt >= self.max_retries:
161
+ logger.error(
162
+ "Export failed after %d attempts: %s",
163
+ attempt + 1,
164
+ str(e),
165
+ )
166
+ raise
167
+
168
+ logger.warning(
169
+ "Export failed (attempt %d/%d): %s, backing off %.2fms",
170
+ attempt + 1,
171
+ self.max_retries + 1,
172
+ str(e),
173
+ backoff_ms,
174
+ )
175
+ time.sleep(backoff_ms / 1000)
176
+ backoff_ms = min(backoff_ms * 2, self.max_backoff_ms)
177
+
178
+ attempt += 1
179
+
180
+ raise Exception(f"Export failed after {self.max_retries + 1} attempts")
181
+
182
+ def shutdown(self) -> None:
183
+ """Shutdown the exporter.
184
+
185
+ Subclasses can override this to implement custom shutdown logic
186
+ like flushing buffers or closing connections.
187
+ """
188
+ self._shutdown = True
189
+ logger.info("Exporter shutdown")