prela 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prela/__init__.py +394 -0
- prela/_version.py +3 -0
- prela/contrib/CLI.md +431 -0
- prela/contrib/README.md +118 -0
- prela/contrib/__init__.py +5 -0
- prela/contrib/cli.py +1063 -0
- prela/contrib/explorer.py +571 -0
- prela/core/__init__.py +64 -0
- prela/core/clock.py +98 -0
- prela/core/context.py +228 -0
- prela/core/replay.py +403 -0
- prela/core/sampler.py +178 -0
- prela/core/span.py +295 -0
- prela/core/tracer.py +498 -0
- prela/evals/__init__.py +94 -0
- prela/evals/assertions/README.md +484 -0
- prela/evals/assertions/__init__.py +78 -0
- prela/evals/assertions/base.py +90 -0
- prela/evals/assertions/multi_agent.py +625 -0
- prela/evals/assertions/semantic.py +223 -0
- prela/evals/assertions/structural.py +443 -0
- prela/evals/assertions/tool.py +380 -0
- prela/evals/case.py +370 -0
- prela/evals/n8n/__init__.py +69 -0
- prela/evals/n8n/assertions.py +450 -0
- prela/evals/n8n/runner.py +497 -0
- prela/evals/reporters/README.md +184 -0
- prela/evals/reporters/__init__.py +32 -0
- prela/evals/reporters/console.py +251 -0
- prela/evals/reporters/json.py +176 -0
- prela/evals/reporters/junit.py +278 -0
- prela/evals/runner.py +525 -0
- prela/evals/suite.py +316 -0
- prela/exporters/__init__.py +27 -0
- prela/exporters/base.py +189 -0
- prela/exporters/console.py +443 -0
- prela/exporters/file.py +322 -0
- prela/exporters/http.py +394 -0
- prela/exporters/multi.py +154 -0
- prela/exporters/otlp.py +388 -0
- prela/instrumentation/ANTHROPIC.md +297 -0
- prela/instrumentation/LANGCHAIN.md +480 -0
- prela/instrumentation/OPENAI.md +59 -0
- prela/instrumentation/__init__.py +49 -0
- prela/instrumentation/anthropic.py +1436 -0
- prela/instrumentation/auto.py +129 -0
- prela/instrumentation/base.py +436 -0
- prela/instrumentation/langchain.py +959 -0
- prela/instrumentation/llamaindex.py +719 -0
- prela/instrumentation/multi_agent/__init__.py +48 -0
- prela/instrumentation/multi_agent/autogen.py +357 -0
- prela/instrumentation/multi_agent/crewai.py +404 -0
- prela/instrumentation/multi_agent/langgraph.py +299 -0
- prela/instrumentation/multi_agent/models.py +203 -0
- prela/instrumentation/multi_agent/swarm.py +231 -0
- prela/instrumentation/n8n/__init__.py +68 -0
- prela/instrumentation/n8n/code_node.py +534 -0
- prela/instrumentation/n8n/models.py +336 -0
- prela/instrumentation/n8n/webhook.py +489 -0
- prela/instrumentation/openai.py +1198 -0
- prela/license.py +245 -0
- prela/replay/__init__.py +31 -0
- prela/replay/comparison.py +390 -0
- prela/replay/engine.py +1227 -0
- prela/replay/loader.py +231 -0
- prela/replay/result.py +196 -0
- prela-0.1.0.dist-info/METADATA +399 -0
- prela-0.1.0.dist-info/RECORD +71 -0
- prela-0.1.0.dist-info/WHEEL +4 -0
- prela-0.1.0.dist-info/entry_points.txt +2 -0
- prela-0.1.0.dist-info/licenses/LICENSE +190 -0
prela/replay/loader.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Trace loading utilities for replay engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from prela.core.span import Span
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Trace:
|
|
16
|
+
"""Represents a complete trace with all spans.
|
|
17
|
+
|
|
18
|
+
A trace is a collection of spans that form a complete execution tree.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, trace_id: str, spans: list[Span]) -> None:
|
|
22
|
+
"""Initialize a trace.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
trace_id: Unique trace identifier
|
|
26
|
+
spans: List of spans in this trace
|
|
27
|
+
"""
|
|
28
|
+
self.trace_id = trace_id
|
|
29
|
+
self.spans = spans
|
|
30
|
+
self._build_tree()
|
|
31
|
+
|
|
32
|
+
def _build_tree(self) -> None:
|
|
33
|
+
"""Build parent-child relationships between spans."""
|
|
34
|
+
# Create span lookup
|
|
35
|
+
self.span_map: dict[str, Span] = {s.span_id: s for s in self.spans}
|
|
36
|
+
|
|
37
|
+
# Build children mapping
|
|
38
|
+
self.children: dict[str, list[Span]] = {}
|
|
39
|
+
self.root_spans: list[Span] = []
|
|
40
|
+
|
|
41
|
+
for span in self.spans:
|
|
42
|
+
if span.parent_span_id is None:
|
|
43
|
+
self.root_spans.append(span)
|
|
44
|
+
else:
|
|
45
|
+
if span.parent_span_id not in self.children:
|
|
46
|
+
self.children[span.parent_span_id] = []
|
|
47
|
+
self.children[span.parent_span_id].append(span)
|
|
48
|
+
|
|
49
|
+
# Sort children by start time for deterministic execution order
|
|
50
|
+
for children_list in self.children.values():
|
|
51
|
+
children_list.sort(key=lambda s: s.started_at)
|
|
52
|
+
|
|
53
|
+
self.root_spans.sort(key=lambda s: s.started_at)
|
|
54
|
+
|
|
55
|
+
def get_children(self, span_id: str) -> list[Span]:
|
|
56
|
+
"""Get all child spans of a given span.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
span_id: Parent span ID
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of child spans (empty if no children)
|
|
63
|
+
"""
|
|
64
|
+
return self.children.get(span_id, [])
|
|
65
|
+
|
|
66
|
+
def walk_depth_first(self) -> list[Span]:
|
|
67
|
+
"""Walk the trace tree depth-first.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
List of spans in depth-first execution order
|
|
71
|
+
"""
|
|
72
|
+
result = []
|
|
73
|
+
|
|
74
|
+
def visit(span: Span) -> None:
|
|
75
|
+
result.append(span)
|
|
76
|
+
for child in self.get_children(span.span_id):
|
|
77
|
+
visit(child)
|
|
78
|
+
|
|
79
|
+
for root in self.root_spans:
|
|
80
|
+
visit(root)
|
|
81
|
+
|
|
82
|
+
return result
|
|
83
|
+
|
|
84
|
+
def has_replay_data(self) -> bool:
|
|
85
|
+
"""Check if trace has replay snapshots.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
True if at least one span has replay data
|
|
89
|
+
"""
|
|
90
|
+
return any(s.replay_snapshot is not None for s in self.spans)
|
|
91
|
+
|
|
92
|
+
def validate_replay_completeness(self) -> tuple[bool, list[str]]:
|
|
93
|
+
"""Validate that trace has complete replay data.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Tuple of (is_complete, list of missing span names)
|
|
97
|
+
"""
|
|
98
|
+
missing = []
|
|
99
|
+
for span in self.spans:
|
|
100
|
+
if span.replay_snapshot is None:
|
|
101
|
+
missing.append(f"{span.name} ({span.span_id})")
|
|
102
|
+
|
|
103
|
+
return len(missing) == 0, missing
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class TraceLoader:
|
|
107
|
+
"""Loads traces from various sources for replay."""
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def from_file(file_path: str | Path) -> Trace:
|
|
111
|
+
"""Load trace from a JSON file.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
file_path: Path to trace JSON file
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Loaded trace
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
FileNotFoundError: If file doesn't exist
|
|
121
|
+
ValueError: If file format is invalid
|
|
122
|
+
"""
|
|
123
|
+
path = Path(file_path)
|
|
124
|
+
if not path.exists():
|
|
125
|
+
raise FileNotFoundError(f"Trace file not found: {file_path}")
|
|
126
|
+
|
|
127
|
+
with open(path) as f:
|
|
128
|
+
data = json.load(f)
|
|
129
|
+
|
|
130
|
+
return TraceLoader.from_dict(data)
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
def from_dict(data: dict[str, Any]) -> Trace:
|
|
134
|
+
"""Load trace from dictionary.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
data: Trace data dictionary
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Loaded trace
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
ValueError: If data format is invalid
|
|
144
|
+
"""
|
|
145
|
+
# Handle both single span and trace array formats
|
|
146
|
+
if isinstance(data, dict) and "trace_id" in data and "spans" in data:
|
|
147
|
+
# Trace format with metadata
|
|
148
|
+
trace_id = data["trace_id"]
|
|
149
|
+
spans_data = data["spans"]
|
|
150
|
+
elif isinstance(data, dict) and "span_id" in data:
|
|
151
|
+
# Single span format
|
|
152
|
+
trace_id = data["trace_id"]
|
|
153
|
+
spans_data = [data]
|
|
154
|
+
elif isinstance(data, list):
|
|
155
|
+
# Array of spans
|
|
156
|
+
if not data:
|
|
157
|
+
raise ValueError("Empty span list")
|
|
158
|
+
trace_id = data[0]["trace_id"]
|
|
159
|
+
spans_data = data
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError("Invalid trace data format")
|
|
162
|
+
|
|
163
|
+
# Deserialize spans
|
|
164
|
+
spans = [Span.from_dict(span_data) for span_data in spans_data]
|
|
165
|
+
|
|
166
|
+
return Trace(trace_id, spans)
|
|
167
|
+
|
|
168
|
+
@staticmethod
|
|
169
|
+
def from_jsonl(file_path: str | Path) -> list[Trace]:
|
|
170
|
+
"""Load multiple traces from JSONL file.
|
|
171
|
+
|
|
172
|
+
Each line should be a complete trace or span.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
file_path: Path to JSONL file
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
List of traces
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
FileNotFoundError: If file doesn't exist
|
|
182
|
+
"""
|
|
183
|
+
path = Path(file_path)
|
|
184
|
+
if not path.exists():
|
|
185
|
+
raise FileNotFoundError(f"JSONL file not found: {file_path}")
|
|
186
|
+
|
|
187
|
+
# Group spans by trace_id
|
|
188
|
+
traces_data: dict[str, list[dict[str, Any]]] = {}
|
|
189
|
+
|
|
190
|
+
with open(path) as f:
|
|
191
|
+
for line in f:
|
|
192
|
+
line = line.strip()
|
|
193
|
+
if not line:
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
span_data = json.loads(line)
|
|
197
|
+
trace_id = span_data["trace_id"]
|
|
198
|
+
|
|
199
|
+
if trace_id not in traces_data:
|
|
200
|
+
traces_data[trace_id] = []
|
|
201
|
+
traces_data[trace_id].append(span_data)
|
|
202
|
+
|
|
203
|
+
# Build traces
|
|
204
|
+
traces = []
|
|
205
|
+
for trace_id, spans_data in traces_data.items():
|
|
206
|
+
spans = [Span.from_dict(span_data) for span_data in spans_data]
|
|
207
|
+
traces.append(Trace(trace_id, spans))
|
|
208
|
+
|
|
209
|
+
return traces
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
def from_span_list(spans: list[Span]) -> Trace:
|
|
213
|
+
"""Create trace from list of spans.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
spans: List of spans
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Trace
|
|
220
|
+
|
|
221
|
+
Raises:
|
|
222
|
+
ValueError: If spans don't share same trace_id
|
|
223
|
+
"""
|
|
224
|
+
if not spans:
|
|
225
|
+
raise ValueError("Empty span list")
|
|
226
|
+
|
|
227
|
+
trace_id = spans[0].trace_id
|
|
228
|
+
if not all(s.trace_id == trace_id for s in spans):
|
|
229
|
+
raise ValueError("All spans must have the same trace_id")
|
|
230
|
+
|
|
231
|
+
return Trace(trace_id, spans)
|
prela/replay/result.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Result data structures for replay execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ReplayedSpan:
|
|
11
|
+
"""Result of replaying a single span.
|
|
12
|
+
|
|
13
|
+
Tracks what was executed, whether it was modified, and the output.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
original_span_id: str
|
|
17
|
+
span_type: str
|
|
18
|
+
name: str
|
|
19
|
+
input: Any
|
|
20
|
+
output: Any
|
|
21
|
+
was_modified: bool = False
|
|
22
|
+
modification_details: str | None = None
|
|
23
|
+
duration_ms: float = 0.0
|
|
24
|
+
tokens_used: int = 0
|
|
25
|
+
cost_usd: float = 0.0
|
|
26
|
+
error: str | None = None
|
|
27
|
+
retry_count: int = 0 # Number of retry attempts for API calls
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, Any]:
|
|
30
|
+
"""Convert to dictionary representation."""
|
|
31
|
+
return {
|
|
32
|
+
"original_span_id": self.original_span_id,
|
|
33
|
+
"span_type": self.span_type,
|
|
34
|
+
"name": self.name,
|
|
35
|
+
"input": self.input,
|
|
36
|
+
"output": self.output,
|
|
37
|
+
"was_modified": self.was_modified,
|
|
38
|
+
"modification_details": self.modification_details,
|
|
39
|
+
"duration_ms": self.duration_ms,
|
|
40
|
+
"tokens_used": self.tokens_used,
|
|
41
|
+
"cost_usd": self.cost_usd,
|
|
42
|
+
"error": self.error,
|
|
43
|
+
"retry_count": self.retry_count,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ReplayResult:
|
|
49
|
+
"""Complete result of replaying a trace.
|
|
50
|
+
|
|
51
|
+
Contains all replayed spans, aggregated metrics, and final output.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
trace_id: str
|
|
55
|
+
spans: list[ReplayedSpan] = field(default_factory=list)
|
|
56
|
+
total_duration_ms: float = 0.0
|
|
57
|
+
total_tokens: int = 0
|
|
58
|
+
total_cost_usd: float = 0.0
|
|
59
|
+
final_output: Any = None
|
|
60
|
+
errors: list[str] = field(default_factory=list)
|
|
61
|
+
modifications_applied: dict[str, Any] = field(default_factory=dict)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def success(self) -> bool:
|
|
65
|
+
"""Check if replay completed without errors."""
|
|
66
|
+
return len(self.errors) == 0 and all(s.error is None for s in self.spans)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def modified_span_count(self) -> int:
|
|
70
|
+
"""Count how many spans were modified."""
|
|
71
|
+
return sum(1 for s in self.spans if s.was_modified)
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> dict[str, Any]:
|
|
74
|
+
"""Convert to dictionary representation."""
|
|
75
|
+
return {
|
|
76
|
+
"trace_id": self.trace_id,
|
|
77
|
+
"spans": [s.to_dict() for s in self.spans],
|
|
78
|
+
"total_duration_ms": self.total_duration_ms,
|
|
79
|
+
"total_tokens": self.total_tokens,
|
|
80
|
+
"total_cost_usd": self.total_cost_usd,
|
|
81
|
+
"final_output": self.final_output,
|
|
82
|
+
"errors": self.errors,
|
|
83
|
+
"modifications_applied": self.modifications_applied,
|
|
84
|
+
"success": self.success,
|
|
85
|
+
"modified_span_count": self.modified_span_count,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class SpanDifference:
|
|
91
|
+
"""Difference between two span executions.
|
|
92
|
+
|
|
93
|
+
Captures what changed between original and modified replay.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
span_name: str
|
|
97
|
+
span_type: str
|
|
98
|
+
field: str
|
|
99
|
+
original_value: Any
|
|
100
|
+
modified_value: Any
|
|
101
|
+
semantic_similarity: float | None = None
|
|
102
|
+
exact_match: bool = False
|
|
103
|
+
|
|
104
|
+
def to_dict(self) -> dict[str, Any]:
|
|
105
|
+
"""Convert to dictionary representation."""
|
|
106
|
+
return {
|
|
107
|
+
"span_name": self.span_name,
|
|
108
|
+
"span_type": self.span_type,
|
|
109
|
+
"field": self.field,
|
|
110
|
+
"original_value": self.original_value,
|
|
111
|
+
"modified_value": self.modified_value,
|
|
112
|
+
"semantic_similarity": self.semantic_similarity,
|
|
113
|
+
"exact_match": self.exact_match,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class ReplayComparison:
|
|
119
|
+
"""Comparison between two replay results.
|
|
120
|
+
|
|
121
|
+
Highlights differences and provides summary statistics.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
original: ReplayResult
|
|
125
|
+
modified: ReplayResult
|
|
126
|
+
differences: list[SpanDifference] = field(default_factory=list)
|
|
127
|
+
summary: str = ""
|
|
128
|
+
semantic_similarity_available: bool = False # Whether sentence-transformers is available
|
|
129
|
+
semantic_similarity_model: str | None = None # Model used for similarity (if available)
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def identical_spans(self) -> int:
|
|
133
|
+
"""Count spans with identical outputs."""
|
|
134
|
+
return len(self.original.spans) - len(self.differences)
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def changed_spans(self) -> int:
|
|
138
|
+
"""Count spans with different outputs."""
|
|
139
|
+
return len(set(d.span_name for d in self.differences))
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def total_cost_delta(self) -> float:
|
|
143
|
+
"""Calculate cost difference."""
|
|
144
|
+
return self.modified.total_cost_usd - self.original.total_cost_usd
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def total_tokens_delta(self) -> int:
|
|
148
|
+
"""Calculate token usage difference."""
|
|
149
|
+
return self.modified.total_tokens - self.original.total_tokens
|
|
150
|
+
|
|
151
|
+
def generate_summary(self) -> str:
|
|
152
|
+
"""Generate human-readable summary of differences."""
|
|
153
|
+
total_spans = len(self.original.spans)
|
|
154
|
+
changed = self.changed_spans
|
|
155
|
+
identical = self.identical_spans
|
|
156
|
+
|
|
157
|
+
# Calculate percentages (handle zero total_spans)
|
|
158
|
+
identical_pct = (identical / total_spans * 100) if total_spans > 0 else 0.0
|
|
159
|
+
changed_pct = (changed / total_spans * 100) if total_spans > 0 else 0.0
|
|
160
|
+
|
|
161
|
+
lines = [
|
|
162
|
+
f"Replay Comparison Summary",
|
|
163
|
+
f"=" * 50,
|
|
164
|
+
f"Total Spans: {total_spans}",
|
|
165
|
+
f"Identical: {identical} ({identical_pct:.1f}%)",
|
|
166
|
+
f"Changed: {changed} ({changed_pct:.1f}%)",
|
|
167
|
+
f"",
|
|
168
|
+
f"Cost: ${self.original.total_cost_usd:.4f} → ${self.modified.total_cost_usd:.4f} "
|
|
169
|
+
f"({'+' if self.total_cost_delta > 0 else ''}{self.total_cost_delta:.4f})",
|
|
170
|
+
f"Tokens: {self.original.total_tokens} → {self.modified.total_tokens} "
|
|
171
|
+
f"({'+' if self.total_tokens_delta > 0 else ''}{self.total_tokens_delta})",
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
if self.differences:
|
|
175
|
+
lines.append("")
|
|
176
|
+
lines.append("Key Differences:")
|
|
177
|
+
for diff in self.differences[:5]: # Show top 5
|
|
178
|
+
lines.append(f" • {diff.span_name} ({diff.field})")
|
|
179
|
+
if diff.semantic_similarity is not None:
|
|
180
|
+
lines.append(f" Similarity: {diff.semantic_similarity:.2%}")
|
|
181
|
+
|
|
182
|
+
self.summary = "\n".join(lines)
|
|
183
|
+
return self.summary
|
|
184
|
+
|
|
185
|
+
def to_dict(self) -> dict[str, Any]:
|
|
186
|
+
"""Convert to dictionary representation."""
|
|
187
|
+
return {
|
|
188
|
+
"original": self.original.to_dict(),
|
|
189
|
+
"modified": self.modified.to_dict(),
|
|
190
|
+
"differences": [d.to_dict() for d in self.differences],
|
|
191
|
+
"summary": self.summary or self.generate_summary(),
|
|
192
|
+
"identical_spans": self.identical_spans,
|
|
193
|
+
"changed_spans": self.changed_spans,
|
|
194
|
+
"total_cost_delta": self.total_cost_delta,
|
|
195
|
+
"total_tokens_delta": self.total_tokens_delta,
|
|
196
|
+
}
|