prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
prela/core/tracer.py
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
"""Tracer for creating and managing spans."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import functools
|
|
7
|
+
import inspect
|
|
8
|
+
import uuid
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from typing import Any, Callable, Iterator, TypeVar
|
|
11
|
+
|
|
12
|
+
from prela.core.clock import now
|
|
13
|
+
from prela.core.context import (
|
|
14
|
+
TraceContext,
|
|
15
|
+
get_current_context,
|
|
16
|
+
reset_context,
|
|
17
|
+
set_context,
|
|
18
|
+
)
|
|
19
|
+
from prela.core.sampler import AlwaysOnSampler, BaseSampler
|
|
20
|
+
from prela.core.span import Span, SpanStatus, SpanType
|
|
21
|
+
from prela.exporters.base import BaseExporter
|
|
22
|
+
|
|
23
|
+
# Type variable for preserving function signatures
|
|
24
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
25
|
+
|
|
26
|
+
# Global tracer instance
|
|
27
|
+
_global_tracer: Tracer | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Tracer:
|
|
31
|
+
"""
|
|
32
|
+
Main tracer for creating and managing spans.
|
|
33
|
+
|
|
34
|
+
The Tracer is responsible for:
|
|
35
|
+
- Creating spans with proper trace/span IDs
|
|
36
|
+
- Managing trace context and span hierarchies
|
|
37
|
+
- Applying sampling decisions
|
|
38
|
+
- Exporting completed spans
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
```python
|
|
42
|
+
from prela.core.tracer import Tracer
|
|
43
|
+
from prela.exporters.console import ConsoleExporter
|
|
44
|
+
|
|
45
|
+
tracer = Tracer(
|
|
46
|
+
service_name="my-agent",
|
|
47
|
+
exporter=ConsoleExporter()
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Create spans using context manager
|
|
51
|
+
with tracer.span("operation") as span:
|
|
52
|
+
span.set_attribute("key", "value")
|
|
53
|
+
# Nested spans inherit trace context
|
|
54
|
+
with tracer.span("sub-operation") as child:
|
|
55
|
+
child.set_attribute("nested", True)
|
|
56
|
+
```
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
service_name: str = "default",
|
|
62
|
+
exporter: BaseExporter | None = None,
|
|
63
|
+
sampler: BaseSampler | None = None,
|
|
64
|
+
capture_for_replay: bool = False,
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Initialize a tracer.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
service_name: Name of the service (added to all spans as service.name)
|
|
71
|
+
exporter: Exporter for sending spans to backend (None = no export)
|
|
72
|
+
sampler: Sampler for controlling trace volume (default: AlwaysOnSampler)
|
|
73
|
+
capture_for_replay: If True, capture full replay data (default: False)
|
|
74
|
+
"""
|
|
75
|
+
self.service_name = service_name
|
|
76
|
+
self.exporter = exporter
|
|
77
|
+
self.sampler = sampler or AlwaysOnSampler()
|
|
78
|
+
self.capture_for_replay = capture_for_replay
|
|
79
|
+
|
|
80
|
+
@contextmanager
|
|
81
|
+
def span(
|
|
82
|
+
self,
|
|
83
|
+
name: str,
|
|
84
|
+
span_type: SpanType = SpanType.CUSTOM,
|
|
85
|
+
attributes: dict[str, Any] | None = None,
|
|
86
|
+
) -> Iterator[Span]:
|
|
87
|
+
"""
|
|
88
|
+
Create a new span as a context manager.
|
|
89
|
+
|
|
90
|
+
The span is automatically:
|
|
91
|
+
- Started when entering the context
|
|
92
|
+
- Ended when exiting the context
|
|
93
|
+
- Exported if it's a root span and sampling decision is True
|
|
94
|
+
- Linked to parent span if one exists in current context
|
|
95
|
+
|
|
96
|
+
Exceptions are automatically captured and recorded on the span.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
name: Name of the span (e.g., "process_request", "llm_call")
|
|
100
|
+
span_type: Type of operation (LLM, TOOL, AGENT, etc.)
|
|
101
|
+
attributes: Initial attributes to set on the span
|
|
102
|
+
|
|
103
|
+
Yields:
|
|
104
|
+
Span: The created span (can be used to add attributes/events)
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
```python
|
|
108
|
+
with tracer.span("database_query", SpanType.CUSTOM) as span:
|
|
109
|
+
span.set_attribute("query", "SELECT * FROM users")
|
|
110
|
+
result = execute_query()
|
|
111
|
+
span.set_attribute("row_count", len(result))
|
|
112
|
+
```
|
|
113
|
+
"""
|
|
114
|
+
# Get or create trace context
|
|
115
|
+
ctx = get_current_context()
|
|
116
|
+
token = None
|
|
117
|
+
if ctx is None:
|
|
118
|
+
# Start new trace
|
|
119
|
+
trace_id = str(uuid.uuid4())
|
|
120
|
+
sampled = self.sampler.should_sample(trace_id)
|
|
121
|
+
ctx = TraceContext(trace_id=trace_id, sampled=sampled)
|
|
122
|
+
token = set_context(ctx)
|
|
123
|
+
else:
|
|
124
|
+
# Continue existing trace
|
|
125
|
+
trace_id = ctx.trace_id
|
|
126
|
+
sampled = ctx.sampled
|
|
127
|
+
|
|
128
|
+
# Create span
|
|
129
|
+
parent_span = ctx.current_span()
|
|
130
|
+
parent_span_id = parent_span.span_id if parent_span else None
|
|
131
|
+
|
|
132
|
+
span = Span(
|
|
133
|
+
span_id=str(uuid.uuid4()),
|
|
134
|
+
trace_id=trace_id,
|
|
135
|
+
parent_span_id=parent_span_id,
|
|
136
|
+
name=name,
|
|
137
|
+
span_type=span_type,
|
|
138
|
+
started_at=now(),
|
|
139
|
+
attributes=attributes or {},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Add service name
|
|
143
|
+
span.set_attribute("service.name", self.service_name)
|
|
144
|
+
|
|
145
|
+
# Push to context
|
|
146
|
+
ctx.push_span(span)
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
yield span
|
|
150
|
+
except Exception as e:
|
|
151
|
+
# Capture exception
|
|
152
|
+
span.set_status(SpanStatus.ERROR, str(e))
|
|
153
|
+
span.set_attribute("error.type", type(e).__name__)
|
|
154
|
+
span.set_attribute("error.message", str(e))
|
|
155
|
+
raise
|
|
156
|
+
finally:
|
|
157
|
+
# End span
|
|
158
|
+
span.end()
|
|
159
|
+
|
|
160
|
+
# Pop from context
|
|
161
|
+
ctx.pop_span()
|
|
162
|
+
|
|
163
|
+
# Add to completed spans collection
|
|
164
|
+
ctx.add_completed_span(span)
|
|
165
|
+
|
|
166
|
+
# Export if sampled and this was a root span
|
|
167
|
+
if sampled and parent_span is None and self.exporter:
|
|
168
|
+
# Export ALL spans in the trace, not just the root
|
|
169
|
+
self.exporter.export(ctx.all_spans)
|
|
170
|
+
|
|
171
|
+
# Reset context if we created it
|
|
172
|
+
if token is not None:
|
|
173
|
+
reset_context(token)
|
|
174
|
+
|
|
175
|
+
def start_span(
|
|
176
|
+
self,
|
|
177
|
+
name: str,
|
|
178
|
+
span_type: SpanType = SpanType.CUSTOM,
|
|
179
|
+
attributes: dict[str, Any] | None = None,
|
|
180
|
+
) -> Span:
|
|
181
|
+
"""
|
|
182
|
+
Create a new span without using a context manager.
|
|
183
|
+
|
|
184
|
+
Unlike the span() context manager, this method returns a Span
|
|
185
|
+
object that must be manually ended by calling span.end().
|
|
186
|
+
This is useful for instrumentations where the span lifetime
|
|
187
|
+
cannot be expressed as a context manager.
|
|
188
|
+
|
|
189
|
+
The span will automatically:
|
|
190
|
+
- Pop itself from the context stack when ended
|
|
191
|
+
- Export itself if it's a root span and sampling is enabled
|
|
192
|
+
- Reset the context if it created a new trace
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
name: Name of the span (e.g., "process_request", "llm_call")
|
|
196
|
+
span_type: Type of operation (LLM, TOOL, AGENT, etc.)
|
|
197
|
+
attributes: Initial attributes to set on the span
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Span: The created span (must call .end() when done)
|
|
201
|
+
|
|
202
|
+
Example:
|
|
203
|
+
```python
|
|
204
|
+
span = tracer.start_span("llm_call", SpanType.LLM)
|
|
205
|
+
span.set_attribute("model", "gpt-4")
|
|
206
|
+
try:
|
|
207
|
+
# Do work
|
|
208
|
+
result = call_llm()
|
|
209
|
+
span.set_status(SpanStatus.SUCCESS)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
span.set_status(SpanStatus.ERROR, str(e))
|
|
212
|
+
finally:
|
|
213
|
+
span.end() # Automatically handles cleanup
|
|
214
|
+
```
|
|
215
|
+
"""
|
|
216
|
+
# Get or create trace context
|
|
217
|
+
ctx = get_current_context()
|
|
218
|
+
created_context = False
|
|
219
|
+
context_token = None
|
|
220
|
+
|
|
221
|
+
if ctx is None:
|
|
222
|
+
# Start new trace
|
|
223
|
+
trace_id = str(uuid.uuid4())
|
|
224
|
+
sampled = self.sampler.should_sample(trace_id)
|
|
225
|
+
ctx = TraceContext(trace_id=trace_id, sampled=sampled)
|
|
226
|
+
context_token = set_context(ctx)
|
|
227
|
+
created_context = True
|
|
228
|
+
else:
|
|
229
|
+
# Continue existing trace
|
|
230
|
+
trace_id = ctx.trace_id
|
|
231
|
+
sampled = ctx.sampled
|
|
232
|
+
|
|
233
|
+
# Create span
|
|
234
|
+
parent_span = ctx.current_span()
|
|
235
|
+
parent_span_id = parent_span.span_id if parent_span else None
|
|
236
|
+
|
|
237
|
+
span = Span(
|
|
238
|
+
span_id=str(uuid.uuid4()),
|
|
239
|
+
trace_id=trace_id,
|
|
240
|
+
parent_span_id=parent_span_id,
|
|
241
|
+
name=name,
|
|
242
|
+
span_type=span_type,
|
|
243
|
+
started_at=now(),
|
|
244
|
+
attributes=attributes or {},
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Add service name
|
|
248
|
+
span.set_attribute("service.name", self.service_name)
|
|
249
|
+
|
|
250
|
+
# Store cleanup metadata on the span (using private attributes)
|
|
251
|
+
# These will be used by the span's end() method or by explicit cleanup
|
|
252
|
+
object.__setattr__(span, "_tracer", self)
|
|
253
|
+
object.__setattr__(span, "_context_token", context_token if created_context else None)
|
|
254
|
+
object.__setattr__(span, "_sampled", sampled)
|
|
255
|
+
|
|
256
|
+
# Push to context
|
|
257
|
+
ctx.push_span(span)
|
|
258
|
+
|
|
259
|
+
return span
|
|
260
|
+
|
|
261
|
+
def get_current_span(self) -> Span | None:
|
|
262
|
+
"""
|
|
263
|
+
Get the currently active span from context.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Span | None: The active span, or None if no span is active
|
|
267
|
+
"""
|
|
268
|
+
ctx = get_current_context()
|
|
269
|
+
return ctx.current_span() if ctx else None
|
|
270
|
+
|
|
271
|
+
def set_global(self) -> None:
|
|
272
|
+
"""
|
|
273
|
+
Set this tracer as the global default.
|
|
274
|
+
|
|
275
|
+
After calling this, get_tracer() will return this tracer instance.
|
|
276
|
+
This is useful for auto-instrumentation where instrumentors need
|
|
277
|
+
access to a tracer without explicit passing.
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
```python
|
|
281
|
+
tracer = Tracer(service_name="my-app")
|
|
282
|
+
tracer.set_global()
|
|
283
|
+
|
|
284
|
+
# Later, from anywhere in the code
|
|
285
|
+
from prela.core.tracer import get_tracer
|
|
286
|
+
tracer = get_tracer()
|
|
287
|
+
```
|
|
288
|
+
"""
|
|
289
|
+
global _global_tracer
|
|
290
|
+
_global_tracer = self
|
|
291
|
+
|
|
292
|
+
def shutdown(self) -> None:
|
|
293
|
+
"""
|
|
294
|
+
Shutdown the tracer and flush exporter.
|
|
295
|
+
|
|
296
|
+
This ensures all pending spans are exported before the process exits.
|
|
297
|
+
Should be called before application shutdown.
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
```python
|
|
301
|
+
import atexit
|
|
302
|
+
|
|
303
|
+
tracer = Tracer(service_name="my-app")
|
|
304
|
+
atexit.register(tracer.shutdown)
|
|
305
|
+
```
|
|
306
|
+
"""
|
|
307
|
+
if self.exporter:
|
|
308
|
+
self.exporter.shutdown()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def get_tracer() -> Tracer | None:
|
|
312
|
+
"""
|
|
313
|
+
Get the global tracer instance.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Tracer | None: The global tracer, or None if no global tracer is set
|
|
317
|
+
|
|
318
|
+
Example:
|
|
319
|
+
```python
|
|
320
|
+
from prela.core.tracer import get_tracer
|
|
321
|
+
|
|
322
|
+
tracer = get_tracer()
|
|
323
|
+
if tracer:
|
|
324
|
+
with tracer.span("operation") as span:
|
|
325
|
+
span.set_attribute("key", "value")
|
|
326
|
+
```
|
|
327
|
+
"""
|
|
328
|
+
return _global_tracer
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def set_global_tracer(tracer: Tracer) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Set the global tracer instance.
|
|
334
|
+
|
|
335
|
+
This is an alternative to calling tracer.set_global().
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
tracer: The tracer to set as global
|
|
339
|
+
|
|
340
|
+
Example:
|
|
341
|
+
```python
|
|
342
|
+
from prela.core.tracer import Tracer, set_global_tracer
|
|
343
|
+
|
|
344
|
+
tracer = Tracer(service_name="my-app")
|
|
345
|
+
set_global_tracer(tracer)
|
|
346
|
+
```
|
|
347
|
+
"""
|
|
348
|
+
global _global_tracer
|
|
349
|
+
_global_tracer = tracer
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def trace(
|
|
353
|
+
name: str | None = None,
|
|
354
|
+
span_type: SpanType = SpanType.CUSTOM,
|
|
355
|
+
attributes: dict[str, Any] | None = None,
|
|
356
|
+
tracer: Tracer | None = None,
|
|
357
|
+
) -> Callable[[F], F]:
|
|
358
|
+
"""
|
|
359
|
+
Decorator for automatically tracing function execution.
|
|
360
|
+
|
|
361
|
+
This decorator wraps a function (sync or async) and creates a span
|
|
362
|
+
for each invocation. The span is automatically:
|
|
363
|
+
- Created when the function is called
|
|
364
|
+
- Ended when the function returns
|
|
365
|
+
- Exported if it's a root span (uses global tracer)
|
|
366
|
+
- Captures exceptions and marks span as ERROR
|
|
367
|
+
|
|
368
|
+
The decorator works with both synchronous and asynchronous functions.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
name: Name of the span (default: function name)
|
|
372
|
+
span_type: Type of operation (default: CUSTOM)
|
|
373
|
+
attributes: Initial attributes to set on the span
|
|
374
|
+
tracer: Tracer instance to use (default: global tracer from init())
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Decorated function with automatic tracing
|
|
378
|
+
|
|
379
|
+
Raises:
|
|
380
|
+
RuntimeError: If no tracer is provided and no global tracer is set
|
|
381
|
+
|
|
382
|
+
Example with sync function:
|
|
383
|
+
```python
|
|
384
|
+
import prela
|
|
385
|
+
|
|
386
|
+
prela.init(service_name="my-app")
|
|
387
|
+
|
|
388
|
+
@prela.trace("process_data")
|
|
389
|
+
def process_data(items):
|
|
390
|
+
# Function is automatically traced
|
|
391
|
+
result = [item * 2 for item in items]
|
|
392
|
+
return result
|
|
393
|
+
|
|
394
|
+
# Each call creates a span
|
|
395
|
+
result = process_data([1, 2, 3])
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Example with async function:
|
|
399
|
+
```python
|
|
400
|
+
import prela
|
|
401
|
+
import asyncio
|
|
402
|
+
|
|
403
|
+
prela.init(service_name="my-app")
|
|
404
|
+
|
|
405
|
+
@prela.trace("fetch_data", span_type=prela.SpanType.RETRIEVAL)
|
|
406
|
+
async def fetch_data(url):
|
|
407
|
+
# Async function is automatically traced
|
|
408
|
+
await asyncio.sleep(0.1)
|
|
409
|
+
return {"data": "example"}
|
|
410
|
+
|
|
411
|
+
# Each call creates a span
|
|
412
|
+
result = await fetch_data("https://api.example.com")
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
Example with custom attributes:
|
|
416
|
+
```python
|
|
417
|
+
@prela.trace(
|
|
418
|
+
"database_query",
|
|
419
|
+
span_type=prela.SpanType.CUSTOM,
|
|
420
|
+
attributes={"db": "postgres", "table": "users"}
|
|
421
|
+
)
|
|
422
|
+
def query_users(limit=10):
|
|
423
|
+
# Span has initial attributes set
|
|
424
|
+
return fetch_users(limit)
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
Example with manual attribute setting:
|
|
428
|
+
```python
|
|
429
|
+
@prela.trace("calculate")
|
|
430
|
+
def calculate(x, y):
|
|
431
|
+
# Access current span to add more attributes
|
|
432
|
+
span = prela.get_current_span()
|
|
433
|
+
if span:
|
|
434
|
+
span.set_attribute("x", x)
|
|
435
|
+
span.set_attribute("y", y)
|
|
436
|
+
result = x + y
|
|
437
|
+
if span:
|
|
438
|
+
span.set_attribute("result", result)
|
|
439
|
+
return result
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
Example using function-specific tracer:
|
|
443
|
+
```python
|
|
444
|
+
my_tracer = Tracer(service_name="custom")
|
|
445
|
+
|
|
446
|
+
@prela.trace("operation", tracer=my_tracer)
|
|
447
|
+
def my_operation():
|
|
448
|
+
pass
|
|
449
|
+
```
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
def decorator(func: F) -> F:
|
|
453
|
+
# Determine span name
|
|
454
|
+
span_name = name or func.__name__
|
|
455
|
+
|
|
456
|
+
# Get the tracer to use
|
|
457
|
+
tracer_instance = tracer
|
|
458
|
+
if tracer_instance is None:
|
|
459
|
+
tracer_instance = get_tracer()
|
|
460
|
+
if tracer_instance is None:
|
|
461
|
+
raise RuntimeError(
|
|
462
|
+
"No global tracer set. Call prela.init() first or provide a tracer parameter."
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Check if function is async
|
|
466
|
+
if asyncio.iscoroutinefunction(func):
|
|
467
|
+
|
|
468
|
+
@functools.wraps(func)
|
|
469
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
470
|
+
with tracer_instance.span(
|
|
471
|
+
span_name, span_type=span_type, attributes=attributes
|
|
472
|
+
) as span:
|
|
473
|
+
# Add function metadata
|
|
474
|
+
span.set_attribute("function.name", func.__name__)
|
|
475
|
+
span.set_attribute("function.module", func.__module__)
|
|
476
|
+
|
|
477
|
+
# Execute the async function
|
|
478
|
+
return await func(*args, **kwargs)
|
|
479
|
+
|
|
480
|
+
return async_wrapper # type: ignore
|
|
481
|
+
|
|
482
|
+
else:
|
|
483
|
+
|
|
484
|
+
@functools.wraps(func)
|
|
485
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
486
|
+
with tracer_instance.span(
|
|
487
|
+
span_name, span_type=span_type, attributes=attributes
|
|
488
|
+
) as span:
|
|
489
|
+
# Add function metadata
|
|
490
|
+
span.set_attribute("function.name", func.__name__)
|
|
491
|
+
span.set_attribute("function.module", func.__module__)
|
|
492
|
+
|
|
493
|
+
# Execute the sync function
|
|
494
|
+
return func(*args, **kwargs)
|
|
495
|
+
|
|
496
|
+
return sync_wrapper # type: ignore
|
|
497
|
+
|
|
498
|
+
return decorator
|
prela/evals/__init__.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Evaluation framework for AI agents.
|
|
2
|
+
|
|
3
|
+
This module provides a comprehensive evaluation framework for testing AI agents:
|
|
4
|
+
- Test case definition (EvalInput, EvalExpected, EvalCase)
|
|
5
|
+
- Test suite management (EvalSuite)
|
|
6
|
+
- YAML-based configuration
|
|
7
|
+
- Assertion framework
|
|
8
|
+
- Result tracking
|
|
9
|
+
- n8n workflow evaluation
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
>>> from prela.evals import EvalSuite, EvalCase, EvalInput, EvalExpected
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Define a test case
|
|
15
|
+
>>> case = EvalCase(
|
|
16
|
+
... id="test_qa",
|
|
17
|
+
... name="Basic QA test",
|
|
18
|
+
... input=EvalInput(query="What is the capital of France?"),
|
|
19
|
+
... expected=EvalExpected(contains=["Paris"]),
|
|
20
|
+
... assertions=[
|
|
21
|
+
... {"type": "contains", "value": "Paris"},
|
|
22
|
+
... {"type": "semantic_similarity", "threshold": 0.8}
|
|
23
|
+
... ]
|
|
24
|
+
... )
|
|
25
|
+
>>>
|
|
26
|
+
>>> # Create a suite
|
|
27
|
+
>>> suite = EvalSuite(
|
|
28
|
+
... name="Geography QA Suite",
|
|
29
|
+
... description="Tests for geography knowledge",
|
|
30
|
+
... cases=[case]
|
|
31
|
+
... )
|
|
32
|
+
>>>
|
|
33
|
+
>>> # Save to YAML
|
|
34
|
+
>>> suite.to_yaml("geography_qa.yaml")
|
|
35
|
+
>>>
|
|
36
|
+
>>> # Load from YAML
|
|
37
|
+
>>> loaded_suite = EvalSuite.from_yaml("geography_qa.yaml")
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from prela.evals.case import EvalCase, EvalExpected, EvalInput
|
|
41
|
+
|
|
42
|
+
# n8n evaluation framework
|
|
43
|
+
from prela.evals.n8n import (
|
|
44
|
+
N8nEvalCase,
|
|
45
|
+
N8nWorkflowEvalConfig,
|
|
46
|
+
N8nWorkflowEvalRunner,
|
|
47
|
+
eval_n8n_workflow,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# n8n assertions (convenience re-export)
|
|
51
|
+
from prela.evals.n8n.assertions import (
|
|
52
|
+
duration_under,
|
|
53
|
+
node_completed,
|
|
54
|
+
node_output,
|
|
55
|
+
tokens_under,
|
|
56
|
+
workflow_completed,
|
|
57
|
+
workflow_status,
|
|
58
|
+
)
|
|
59
|
+
from prela.evals.reporters import ConsoleReporter, JSONReporter, JUnitReporter
|
|
60
|
+
from prela.evals.runner import (
|
|
61
|
+
CaseResult,
|
|
62
|
+
EvalRunResult,
|
|
63
|
+
EvalRunner,
|
|
64
|
+
create_assertion,
|
|
65
|
+
)
|
|
66
|
+
from prela.evals.suite import EvalSuite
|
|
67
|
+
|
|
68
|
+
__all__ = [
|
|
69
|
+
# Core eval framework
|
|
70
|
+
"EvalCase",
|
|
71
|
+
"EvalExpected",
|
|
72
|
+
"EvalInput",
|
|
73
|
+
"EvalSuite",
|
|
74
|
+
"CaseResult",
|
|
75
|
+
"EvalRunResult",
|
|
76
|
+
"EvalRunner",
|
|
77
|
+
"create_assertion",
|
|
78
|
+
# Reporters
|
|
79
|
+
"ConsoleReporter",
|
|
80
|
+
"JSONReporter",
|
|
81
|
+
"JUnitReporter",
|
|
82
|
+
# n8n workflow evaluation
|
|
83
|
+
"N8nEvalCase",
|
|
84
|
+
"N8nWorkflowEvalConfig",
|
|
85
|
+
"N8nWorkflowEvalRunner",
|
|
86
|
+
"eval_n8n_workflow",
|
|
87
|
+
# n8n assertions (convenience)
|
|
88
|
+
"node_completed",
|
|
89
|
+
"node_output",
|
|
90
|
+
"duration_under",
|
|
91
|
+
"tokens_under",
|
|
92
|
+
"workflow_completed",
|
|
93
|
+
"workflow_status",
|
|
94
|
+
]
|