prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
prela/core/tracer.py ADDED
@@ -0,0 +1,498 @@
1
+ """Tracer for creating and managing spans."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import functools
7
+ import inspect
8
+ import uuid
9
+ from contextlib import contextmanager
10
+ from typing import Any, Callable, Iterator, TypeVar
11
+
12
+ from prela.core.clock import now
13
+ from prela.core.context import (
14
+ TraceContext,
15
+ get_current_context,
16
+ reset_context,
17
+ set_context,
18
+ )
19
+ from prela.core.sampler import AlwaysOnSampler, BaseSampler
20
+ from prela.core.span import Span, SpanStatus, SpanType
21
+ from prela.exporters.base import BaseExporter
22
+
23
+ # Type variable for preserving function signatures
24
+ F = TypeVar("F", bound=Callable[..., Any])
25
+
26
+ # Global tracer instance
27
+ _global_tracer: Tracer | None = None
28
+
29
+
30
+ class Tracer:
31
+ """
32
+ Main tracer for creating and managing spans.
33
+
34
+ The Tracer is responsible for:
35
+ - Creating spans with proper trace/span IDs
36
+ - Managing trace context and span hierarchies
37
+ - Applying sampling decisions
38
+ - Exporting completed spans
39
+
40
+ Example:
41
+ ```python
42
+ from prela.core.tracer import Tracer
43
+ from prela.exporters.console import ConsoleExporter
44
+
45
+ tracer = Tracer(
46
+ service_name="my-agent",
47
+ exporter=ConsoleExporter()
48
+ )
49
+
50
+ # Create spans using context manager
51
+ with tracer.span("operation") as span:
52
+ span.set_attribute("key", "value")
53
+ # Nested spans inherit trace context
54
+ with tracer.span("sub-operation") as child:
55
+ child.set_attribute("nested", True)
56
+ ```
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ service_name: str = "default",
62
+ exporter: BaseExporter | None = None,
63
+ sampler: BaseSampler | None = None,
64
+ capture_for_replay: bool = False,
65
+ ):
66
+ """
67
+ Initialize a tracer.
68
+
69
+ Args:
70
+ service_name: Name of the service (added to all spans as service.name)
71
+ exporter: Exporter for sending spans to backend (None = no export)
72
+ sampler: Sampler for controlling trace volume (default: AlwaysOnSampler)
73
+ capture_for_replay: If True, capture full replay data (default: False)
74
+ """
75
+ self.service_name = service_name
76
+ self.exporter = exporter
77
+ self.sampler = sampler or AlwaysOnSampler()
78
+ self.capture_for_replay = capture_for_replay
79
+
80
+ @contextmanager
81
+ def span(
82
+ self,
83
+ name: str,
84
+ span_type: SpanType = SpanType.CUSTOM,
85
+ attributes: dict[str, Any] | None = None,
86
+ ) -> Iterator[Span]:
87
+ """
88
+ Create a new span as a context manager.
89
+
90
+ The span is automatically:
91
+ - Started when entering the context
92
+ - Ended when exiting the context
93
+ - Exported if it's a root span and sampling decision is True
94
+ - Linked to parent span if one exists in current context
95
+
96
+ Exceptions are automatically captured and recorded on the span.
97
+
98
+ Args:
99
+ name: Name of the span (e.g., "process_request", "llm_call")
100
+ span_type: Type of operation (LLM, TOOL, AGENT, etc.)
101
+ attributes: Initial attributes to set on the span
102
+
103
+ Yields:
104
+ Span: The created span (can be used to add attributes/events)
105
+
106
+ Example:
107
+ ```python
108
+ with tracer.span("database_query", SpanType.CUSTOM) as span:
109
+ span.set_attribute("query", "SELECT * FROM users")
110
+ result = execute_query()
111
+ span.set_attribute("row_count", len(result))
112
+ ```
113
+ """
114
+ # Get or create trace context
115
+ ctx = get_current_context()
116
+ token = None
117
+ if ctx is None:
118
+ # Start new trace
119
+ trace_id = str(uuid.uuid4())
120
+ sampled = self.sampler.should_sample(trace_id)
121
+ ctx = TraceContext(trace_id=trace_id, sampled=sampled)
122
+ token = set_context(ctx)
123
+ else:
124
+ # Continue existing trace
125
+ trace_id = ctx.trace_id
126
+ sampled = ctx.sampled
127
+
128
+ # Create span
129
+ parent_span = ctx.current_span()
130
+ parent_span_id = parent_span.span_id if parent_span else None
131
+
132
+ span = Span(
133
+ span_id=str(uuid.uuid4()),
134
+ trace_id=trace_id,
135
+ parent_span_id=parent_span_id,
136
+ name=name,
137
+ span_type=span_type,
138
+ started_at=now(),
139
+ attributes=attributes or {},
140
+ )
141
+
142
+ # Add service name
143
+ span.set_attribute("service.name", self.service_name)
144
+
145
+ # Push to context
146
+ ctx.push_span(span)
147
+
148
+ try:
149
+ yield span
150
+ except Exception as e:
151
+ # Capture exception
152
+ span.set_status(SpanStatus.ERROR, str(e))
153
+ span.set_attribute("error.type", type(e).__name__)
154
+ span.set_attribute("error.message", str(e))
155
+ raise
156
+ finally:
157
+ # End span
158
+ span.end()
159
+
160
+ # Pop from context
161
+ ctx.pop_span()
162
+
163
+ # Add to completed spans collection
164
+ ctx.add_completed_span(span)
165
+
166
+ # Export if sampled and this was a root span
167
+ if sampled and parent_span is None and self.exporter:
168
+ # Export ALL spans in the trace, not just the root
169
+ self.exporter.export(ctx.all_spans)
170
+
171
+ # Reset context if we created it
172
+ if token is not None:
173
+ reset_context(token)
174
+
175
+ def start_span(
176
+ self,
177
+ name: str,
178
+ span_type: SpanType = SpanType.CUSTOM,
179
+ attributes: dict[str, Any] | None = None,
180
+ ) -> Span:
181
+ """
182
+ Create a new span without using a context manager.
183
+
184
+ Unlike the span() context manager, this method returns a Span
185
+ object that must be manually ended by calling span.end().
186
+ This is useful for instrumentations where the span lifetime
187
+ cannot be expressed as a context manager.
188
+
189
+ The span will automatically:
190
+ - Pop itself from the context stack when ended
191
+ - Export itself if it's a root span and sampling is enabled
192
+ - Reset the context if it created a new trace
193
+
194
+ Args:
195
+ name: Name of the span (e.g., "process_request", "llm_call")
196
+ span_type: Type of operation (LLM, TOOL, AGENT, etc.)
197
+ attributes: Initial attributes to set on the span
198
+
199
+ Returns:
200
+ Span: The created span (must call .end() when done)
201
+
202
+ Example:
203
+ ```python
204
+ span = tracer.start_span("llm_call", SpanType.LLM)
205
+ span.set_attribute("model", "gpt-4")
206
+ try:
207
+ # Do work
208
+ result = call_llm()
209
+ span.set_status(SpanStatus.SUCCESS)
210
+ except Exception as e:
211
+ span.set_status(SpanStatus.ERROR, str(e))
212
+ finally:
213
+ span.end() # Automatically handles cleanup
214
+ ```
215
+ """
216
+ # Get or create trace context
217
+ ctx = get_current_context()
218
+ created_context = False
219
+ context_token = None
220
+
221
+ if ctx is None:
222
+ # Start new trace
223
+ trace_id = str(uuid.uuid4())
224
+ sampled = self.sampler.should_sample(trace_id)
225
+ ctx = TraceContext(trace_id=trace_id, sampled=sampled)
226
+ context_token = set_context(ctx)
227
+ created_context = True
228
+ else:
229
+ # Continue existing trace
230
+ trace_id = ctx.trace_id
231
+ sampled = ctx.sampled
232
+
233
+ # Create span
234
+ parent_span = ctx.current_span()
235
+ parent_span_id = parent_span.span_id if parent_span else None
236
+
237
+ span = Span(
238
+ span_id=str(uuid.uuid4()),
239
+ trace_id=trace_id,
240
+ parent_span_id=parent_span_id,
241
+ name=name,
242
+ span_type=span_type,
243
+ started_at=now(),
244
+ attributes=attributes or {},
245
+ )
246
+
247
+ # Add service name
248
+ span.set_attribute("service.name", self.service_name)
249
+
250
+ # Store cleanup metadata on the span (using private attributes)
251
+ # These will be used by the span's end() method or by explicit cleanup
252
+ object.__setattr__(span, "_tracer", self)
253
+ object.__setattr__(span, "_context_token", context_token if created_context else None)
254
+ object.__setattr__(span, "_sampled", sampled)
255
+
256
+ # Push to context
257
+ ctx.push_span(span)
258
+
259
+ return span
260
+
261
+ def get_current_span(self) -> Span | None:
262
+ """
263
+ Get the currently active span from context.
264
+
265
+ Returns:
266
+ Span | None: The active span, or None if no span is active
267
+ """
268
+ ctx = get_current_context()
269
+ return ctx.current_span() if ctx else None
270
+
271
+ def set_global(self) -> None:
272
+ """
273
+ Set this tracer as the global default.
274
+
275
+ After calling this, get_tracer() will return this tracer instance.
276
+ This is useful for auto-instrumentation where instrumentors need
277
+ access to a tracer without explicit passing.
278
+
279
+ Example:
280
+ ```python
281
+ tracer = Tracer(service_name="my-app")
282
+ tracer.set_global()
283
+
284
+ # Later, from anywhere in the code
285
+ from prela.core.tracer import get_tracer
286
+ tracer = get_tracer()
287
+ ```
288
+ """
289
+ global _global_tracer
290
+ _global_tracer = self
291
+
292
+ def shutdown(self) -> None:
293
+ """
294
+ Shutdown the tracer and flush exporter.
295
+
296
+ This ensures all pending spans are exported before the process exits.
297
+ Should be called before application shutdown.
298
+
299
+ Example:
300
+ ```python
301
+ import atexit
302
+
303
+ tracer = Tracer(service_name="my-app")
304
+ atexit.register(tracer.shutdown)
305
+ ```
306
+ """
307
+ if self.exporter:
308
+ self.exporter.shutdown()
309
+
310
+
311
+ def get_tracer() -> Tracer | None:
312
+ """
313
+ Get the global tracer instance.
314
+
315
+ Returns:
316
+ Tracer | None: The global tracer, or None if no global tracer is set
317
+
318
+ Example:
319
+ ```python
320
+ from prela.core.tracer import get_tracer
321
+
322
+ tracer = get_tracer()
323
+ if tracer:
324
+ with tracer.span("operation") as span:
325
+ span.set_attribute("key", "value")
326
+ ```
327
+ """
328
+ return _global_tracer
329
+
330
+
331
+ def set_global_tracer(tracer: Tracer) -> None:
332
+ """
333
+ Set the global tracer instance.
334
+
335
+ This is an alternative to calling tracer.set_global().
336
+
337
+ Args:
338
+ tracer: The tracer to set as global
339
+
340
+ Example:
341
+ ```python
342
+ from prela.core.tracer import Tracer, set_global_tracer
343
+
344
+ tracer = Tracer(service_name="my-app")
345
+ set_global_tracer(tracer)
346
+ ```
347
+ """
348
+ global _global_tracer
349
+ _global_tracer = tracer
350
+
351
+
352
+ def trace(
353
+ name: str | None = None,
354
+ span_type: SpanType = SpanType.CUSTOM,
355
+ attributes: dict[str, Any] | None = None,
356
+ tracer: Tracer | None = None,
357
+ ) -> Callable[[F], F]:
358
+ """
359
+ Decorator for automatically tracing function execution.
360
+
361
+ This decorator wraps a function (sync or async) and creates a span
362
+ for each invocation. The span is automatically:
363
+ - Created when the function is called
364
+ - Ended when the function returns
365
+ - Exported if it's a root span (uses global tracer)
366
+ - Captures exceptions and marks span as ERROR
367
+
368
+ The decorator works with both synchronous and asynchronous functions.
369
+
370
+ Args:
371
+ name: Name of the span (default: function name)
372
+ span_type: Type of operation (default: CUSTOM)
373
+ attributes: Initial attributes to set on the span
374
+ tracer: Tracer instance to use (default: global tracer from init())
375
+
376
+ Returns:
377
+ Decorated function with automatic tracing
378
+
379
+ Raises:
380
+ RuntimeError: If no tracer is provided and no global tracer is set
381
+
382
+ Example with sync function:
383
+ ```python
384
+ import prela
385
+
386
+ prela.init(service_name="my-app")
387
+
388
+ @prela.trace("process_data")
389
+ def process_data(items):
390
+ # Function is automatically traced
391
+ result = [item * 2 for item in items]
392
+ return result
393
+
394
+ # Each call creates a span
395
+ result = process_data([1, 2, 3])
396
+ ```
397
+
398
+ Example with async function:
399
+ ```python
400
+ import prela
401
+ import asyncio
402
+
403
+ prela.init(service_name="my-app")
404
+
405
+ @prela.trace("fetch_data", span_type=prela.SpanType.RETRIEVAL)
406
+ async def fetch_data(url):
407
+ # Async function is automatically traced
408
+ await asyncio.sleep(0.1)
409
+ return {"data": "example"}
410
+
411
+ # Each call creates a span
412
+ result = await fetch_data("https://api.example.com")
413
+ ```
414
+
415
+ Example with custom attributes:
416
+ ```python
417
+ @prela.trace(
418
+ "database_query",
419
+ span_type=prela.SpanType.CUSTOM,
420
+ attributes={"db": "postgres", "table": "users"}
421
+ )
422
+ def query_users(limit=10):
423
+ # Span has initial attributes set
424
+ return fetch_users(limit)
425
+ ```
426
+
427
+ Example with manual attribute setting:
428
+ ```python
429
+ @prela.trace("calculate")
430
+ def calculate(x, y):
431
+ # Access current span to add more attributes
432
+ span = prela.get_current_span()
433
+ if span:
434
+ span.set_attribute("x", x)
435
+ span.set_attribute("y", y)
436
+ result = x + y
437
+ if span:
438
+ span.set_attribute("result", result)
439
+ return result
440
+ ```
441
+
442
+ Example using function-specific tracer:
443
+ ```python
444
+ my_tracer = Tracer(service_name="custom")
445
+
446
+ @prela.trace("operation", tracer=my_tracer)
447
+ def my_operation():
448
+ pass
449
+ ```
450
+ """
451
+
452
+ def decorator(func: F) -> F:
453
+ # Determine span name
454
+ span_name = name or func.__name__
455
+
456
+ # Get the tracer to use
457
+ tracer_instance = tracer
458
+ if tracer_instance is None:
459
+ tracer_instance = get_tracer()
460
+ if tracer_instance is None:
461
+ raise RuntimeError(
462
+ "No global tracer set. Call prela.init() first or provide a tracer parameter."
463
+ )
464
+
465
+ # Check if function is async
466
+ if asyncio.iscoroutinefunction(func):
467
+
468
+ @functools.wraps(func)
469
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
470
+ with tracer_instance.span(
471
+ span_name, span_type=span_type, attributes=attributes
472
+ ) as span:
473
+ # Add function metadata
474
+ span.set_attribute("function.name", func.__name__)
475
+ span.set_attribute("function.module", func.__module__)
476
+
477
+ # Execute the async function
478
+ return await func(*args, **kwargs)
479
+
480
+ return async_wrapper # type: ignore
481
+
482
+ else:
483
+
484
+ @functools.wraps(func)
485
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
486
+ with tracer_instance.span(
487
+ span_name, span_type=span_type, attributes=attributes
488
+ ) as span:
489
+ # Add function metadata
490
+ span.set_attribute("function.name", func.__name__)
491
+ span.set_attribute("function.module", func.__module__)
492
+
493
+ # Execute the sync function
494
+ return func(*args, **kwargs)
495
+
496
+ return sync_wrapper # type: ignore
497
+
498
+ return decorator
@@ -0,0 +1,94 @@
1
+ """Evaluation framework for AI agents.
2
+
3
+ This module provides a comprehensive evaluation framework for testing AI agents:
4
+ - Test case definition (EvalInput, EvalExpected, EvalCase)
5
+ - Test suite management (EvalSuite)
6
+ - YAML-based configuration
7
+ - Assertion framework
8
+ - Result tracking
9
+ - n8n workflow evaluation
10
+
11
+ Example:
12
+ >>> from prela.evals import EvalSuite, EvalCase, EvalInput, EvalExpected
13
+ >>>
14
+ >>> # Define a test case
15
+ >>> case = EvalCase(
16
+ ... id="test_qa",
17
+ ... name="Basic QA test",
18
+ ... input=EvalInput(query="What is the capital of France?"),
19
+ ... expected=EvalExpected(contains=["Paris"]),
20
+ ... assertions=[
21
+ ... {"type": "contains", "value": "Paris"},
22
+ ... {"type": "semantic_similarity", "threshold": 0.8}
23
+ ... ]
24
+ ... )
25
+ >>>
26
+ >>> # Create a suite
27
+ >>> suite = EvalSuite(
28
+ ... name="Geography QA Suite",
29
+ ... description="Tests for geography knowledge",
30
+ ... cases=[case]
31
+ ... )
32
+ >>>
33
+ >>> # Save to YAML
34
+ >>> suite.to_yaml("geography_qa.yaml")
35
+ >>>
36
+ >>> # Load from YAML
37
+ >>> loaded_suite = EvalSuite.from_yaml("geography_qa.yaml")
38
+ """
39
+
40
+ from prela.evals.case import EvalCase, EvalExpected, EvalInput
41
+
42
+ # n8n evaluation framework
43
+ from prela.evals.n8n import (
44
+ N8nEvalCase,
45
+ N8nWorkflowEvalConfig,
46
+ N8nWorkflowEvalRunner,
47
+ eval_n8n_workflow,
48
+ )
49
+
50
+ # n8n assertions (convenience re-export)
51
+ from prela.evals.n8n.assertions import (
52
+ duration_under,
53
+ node_completed,
54
+ node_output,
55
+ tokens_under,
56
+ workflow_completed,
57
+ workflow_status,
58
+ )
59
+ from prela.evals.reporters import ConsoleReporter, JSONReporter, JUnitReporter
60
+ from prela.evals.runner import (
61
+ CaseResult,
62
+ EvalRunResult,
63
+ EvalRunner,
64
+ create_assertion,
65
+ )
66
+ from prela.evals.suite import EvalSuite
67
+
68
+ __all__ = [
69
+ # Core eval framework
70
+ "EvalCase",
71
+ "EvalExpected",
72
+ "EvalInput",
73
+ "EvalSuite",
74
+ "CaseResult",
75
+ "EvalRunResult",
76
+ "EvalRunner",
77
+ "create_assertion",
78
+ # Reporters
79
+ "ConsoleReporter",
80
+ "JSONReporter",
81
+ "JUnitReporter",
82
+ # n8n workflow evaluation
83
+ "N8nEvalCase",
84
+ "N8nWorkflowEvalConfig",
85
+ "N8nWorkflowEvalRunner",
86
+ "eval_n8n_workflow",
87
+ # n8n assertions (convenience)
88
+ "node_completed",
89
+ "node_output",
90
+ "duration_under",
91
+ "tokens_under",
92
+ "workflow_completed",
93
+ "workflow_status",
94
+ ]