prela 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. prela/__init__.py +394 -0
  2. prela/_version.py +3 -0
  3. prela/contrib/CLI.md +431 -0
  4. prela/contrib/README.md +118 -0
  5. prela/contrib/__init__.py +5 -0
  6. prela/contrib/cli.py +1063 -0
  7. prela/contrib/explorer.py +571 -0
  8. prela/core/__init__.py +64 -0
  9. prela/core/clock.py +98 -0
  10. prela/core/context.py +228 -0
  11. prela/core/replay.py +403 -0
  12. prela/core/sampler.py +178 -0
  13. prela/core/span.py +295 -0
  14. prela/core/tracer.py +498 -0
  15. prela/evals/__init__.py +94 -0
  16. prela/evals/assertions/README.md +484 -0
  17. prela/evals/assertions/__init__.py +78 -0
  18. prela/evals/assertions/base.py +90 -0
  19. prela/evals/assertions/multi_agent.py +625 -0
  20. prela/evals/assertions/semantic.py +223 -0
  21. prela/evals/assertions/structural.py +443 -0
  22. prela/evals/assertions/tool.py +380 -0
  23. prela/evals/case.py +370 -0
  24. prela/evals/n8n/__init__.py +69 -0
  25. prela/evals/n8n/assertions.py +450 -0
  26. prela/evals/n8n/runner.py +497 -0
  27. prela/evals/reporters/README.md +184 -0
  28. prela/evals/reporters/__init__.py +32 -0
  29. prela/evals/reporters/console.py +251 -0
  30. prela/evals/reporters/json.py +176 -0
  31. prela/evals/reporters/junit.py +278 -0
  32. prela/evals/runner.py +525 -0
  33. prela/evals/suite.py +316 -0
  34. prela/exporters/__init__.py +27 -0
  35. prela/exporters/base.py +189 -0
  36. prela/exporters/console.py +443 -0
  37. prela/exporters/file.py +322 -0
  38. prela/exporters/http.py +394 -0
  39. prela/exporters/multi.py +154 -0
  40. prela/exporters/otlp.py +388 -0
  41. prela/instrumentation/ANTHROPIC.md +297 -0
  42. prela/instrumentation/LANGCHAIN.md +480 -0
  43. prela/instrumentation/OPENAI.md +59 -0
  44. prela/instrumentation/__init__.py +49 -0
  45. prela/instrumentation/anthropic.py +1436 -0
  46. prela/instrumentation/auto.py +129 -0
  47. prela/instrumentation/base.py +436 -0
  48. prela/instrumentation/langchain.py +959 -0
  49. prela/instrumentation/llamaindex.py +719 -0
  50. prela/instrumentation/multi_agent/__init__.py +48 -0
  51. prela/instrumentation/multi_agent/autogen.py +357 -0
  52. prela/instrumentation/multi_agent/crewai.py +404 -0
  53. prela/instrumentation/multi_agent/langgraph.py +299 -0
  54. prela/instrumentation/multi_agent/models.py +203 -0
  55. prela/instrumentation/multi_agent/swarm.py +231 -0
  56. prela/instrumentation/n8n/__init__.py +68 -0
  57. prela/instrumentation/n8n/code_node.py +534 -0
  58. prela/instrumentation/n8n/models.py +336 -0
  59. prela/instrumentation/n8n/webhook.py +489 -0
  60. prela/instrumentation/openai.py +1198 -0
  61. prela/license.py +245 -0
  62. prela/replay/__init__.py +31 -0
  63. prela/replay/comparison.py +390 -0
  64. prela/replay/engine.py +1227 -0
  65. prela/replay/loader.py +231 -0
  66. prela/replay/result.py +196 -0
  67. prela-0.1.0.dist-info/METADATA +399 -0
  68. prela-0.1.0.dist-info/RECORD +71 -0
  69. prela-0.1.0.dist-info/WHEEL +4 -0
  70. prela-0.1.0.dist-info/entry_points.txt +2 -0
  71. prela-0.1.0.dist-info/licenses/LICENSE +190 -0
prela/replay/loader.py ADDED
@@ -0,0 +1,231 @@
1
+ """Trace loading utilities for replay engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from prela.core.span import Span
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class Trace:
16
+ """Represents a complete trace with all spans.
17
+
18
+ A trace is a collection of spans that form a complete execution tree.
19
+ """
20
+
21
+ def __init__(self, trace_id: str, spans: list[Span]) -> None:
22
+ """Initialize a trace.
23
+
24
+ Args:
25
+ trace_id: Unique trace identifier
26
+ spans: List of spans in this trace
27
+ """
28
+ self.trace_id = trace_id
29
+ self.spans = spans
30
+ self._build_tree()
31
+
32
+ def _build_tree(self) -> None:
33
+ """Build parent-child relationships between spans."""
34
+ # Create span lookup
35
+ self.span_map: dict[str, Span] = {s.span_id: s for s in self.spans}
36
+
37
+ # Build children mapping
38
+ self.children: dict[str, list[Span]] = {}
39
+ self.root_spans: list[Span] = []
40
+
41
+ for span in self.spans:
42
+ if span.parent_span_id is None:
43
+ self.root_spans.append(span)
44
+ else:
45
+ if span.parent_span_id not in self.children:
46
+ self.children[span.parent_span_id] = []
47
+ self.children[span.parent_span_id].append(span)
48
+
49
+ # Sort children by start time for deterministic execution order
50
+ for children_list in self.children.values():
51
+ children_list.sort(key=lambda s: s.started_at)
52
+
53
+ self.root_spans.sort(key=lambda s: s.started_at)
54
+
55
+ def get_children(self, span_id: str) -> list[Span]:
56
+ """Get all child spans of a given span.
57
+
58
+ Args:
59
+ span_id: Parent span ID
60
+
61
+ Returns:
62
+ List of child spans (empty if no children)
63
+ """
64
+ return self.children.get(span_id, [])
65
+
66
+ def walk_depth_first(self) -> list[Span]:
67
+ """Walk the trace tree depth-first.
68
+
69
+ Returns:
70
+ List of spans in depth-first execution order
71
+ """
72
+ result = []
73
+
74
+ def visit(span: Span) -> None:
75
+ result.append(span)
76
+ for child in self.get_children(span.span_id):
77
+ visit(child)
78
+
79
+ for root in self.root_spans:
80
+ visit(root)
81
+
82
+ return result
83
+
84
+ def has_replay_data(self) -> bool:
85
+ """Check if trace has replay snapshots.
86
+
87
+ Returns:
88
+ True if at least one span has replay data
89
+ """
90
+ return any(s.replay_snapshot is not None for s in self.spans)
91
+
92
+ def validate_replay_completeness(self) -> tuple[bool, list[str]]:
93
+ """Validate that trace has complete replay data.
94
+
95
+ Returns:
96
+ Tuple of (is_complete, list of missing span names)
97
+ """
98
+ missing = []
99
+ for span in self.spans:
100
+ if span.replay_snapshot is None:
101
+ missing.append(f"{span.name} ({span.span_id})")
102
+
103
+ return len(missing) == 0, missing
104
+
105
+
106
+ class TraceLoader:
107
+ """Loads traces from various sources for replay."""
108
+
109
+ @staticmethod
110
+ def from_file(file_path: str | Path) -> Trace:
111
+ """Load trace from a JSON file.
112
+
113
+ Args:
114
+ file_path: Path to trace JSON file
115
+
116
+ Returns:
117
+ Loaded trace
118
+
119
+ Raises:
120
+ FileNotFoundError: If file doesn't exist
121
+ ValueError: If file format is invalid
122
+ """
123
+ path = Path(file_path)
124
+ if not path.exists():
125
+ raise FileNotFoundError(f"Trace file not found: {file_path}")
126
+
127
+ with open(path) as f:
128
+ data = json.load(f)
129
+
130
+ return TraceLoader.from_dict(data)
131
+
132
+ @staticmethod
133
+ def from_dict(data: dict[str, Any]) -> Trace:
134
+ """Load trace from dictionary.
135
+
136
+ Args:
137
+ data: Trace data dictionary
138
+
139
+ Returns:
140
+ Loaded trace
141
+
142
+ Raises:
143
+ ValueError: If data format is invalid
144
+ """
145
+ # Handle both single span and trace array formats
146
+ if isinstance(data, dict) and "trace_id" in data and "spans" in data:
147
+ # Trace format with metadata
148
+ trace_id = data["trace_id"]
149
+ spans_data = data["spans"]
150
+ elif isinstance(data, dict) and "span_id" in data:
151
+ # Single span format
152
+ trace_id = data["trace_id"]
153
+ spans_data = [data]
154
+ elif isinstance(data, list):
155
+ # Array of spans
156
+ if not data:
157
+ raise ValueError("Empty span list")
158
+ trace_id = data[0]["trace_id"]
159
+ spans_data = data
160
+ else:
161
+ raise ValueError("Invalid trace data format")
162
+
163
+ # Deserialize spans
164
+ spans = [Span.from_dict(span_data) for span_data in spans_data]
165
+
166
+ return Trace(trace_id, spans)
167
+
168
+ @staticmethod
169
+ def from_jsonl(file_path: str | Path) -> list[Trace]:
170
+ """Load multiple traces from JSONL file.
171
+
172
+ Each line should be a complete trace or span.
173
+
174
+ Args:
175
+ file_path: Path to JSONL file
176
+
177
+ Returns:
178
+ List of traces
179
+
180
+ Raises:
181
+ FileNotFoundError: If file doesn't exist
182
+ """
183
+ path = Path(file_path)
184
+ if not path.exists():
185
+ raise FileNotFoundError(f"JSONL file not found: {file_path}")
186
+
187
+ # Group spans by trace_id
188
+ traces_data: dict[str, list[dict[str, Any]]] = {}
189
+
190
+ with open(path) as f:
191
+ for line in f:
192
+ line = line.strip()
193
+ if not line:
194
+ continue
195
+
196
+ span_data = json.loads(line)
197
+ trace_id = span_data["trace_id"]
198
+
199
+ if trace_id not in traces_data:
200
+ traces_data[trace_id] = []
201
+ traces_data[trace_id].append(span_data)
202
+
203
+ # Build traces
204
+ traces = []
205
+ for trace_id, spans_data in traces_data.items():
206
+ spans = [Span.from_dict(span_data) for span_data in spans_data]
207
+ traces.append(Trace(trace_id, spans))
208
+
209
+ return traces
210
+
211
+ @staticmethod
212
+ def from_span_list(spans: list[Span]) -> Trace:
213
+ """Create trace from list of spans.
214
+
215
+ Args:
216
+ spans: List of spans
217
+
218
+ Returns:
219
+ Trace
220
+
221
+ Raises:
222
+ ValueError: If spans don't share same trace_id
223
+ """
224
+ if not spans:
225
+ raise ValueError("Empty span list")
226
+
227
+ trace_id = spans[0].trace_id
228
+ if not all(s.trace_id == trace_id for s in spans):
229
+ raise ValueError("All spans must have the same trace_id")
230
+
231
+ return Trace(trace_id, spans)
prela/replay/result.py ADDED
@@ -0,0 +1,196 @@
1
+ """Result data structures for replay execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+
9
+ @dataclass
10
+ class ReplayedSpan:
11
+ """Result of replaying a single span.
12
+
13
+ Tracks what was executed, whether it was modified, and the output.
14
+ """
15
+
16
+ original_span_id: str
17
+ span_type: str
18
+ name: str
19
+ input: Any
20
+ output: Any
21
+ was_modified: bool = False
22
+ modification_details: str | None = None
23
+ duration_ms: float = 0.0
24
+ tokens_used: int = 0
25
+ cost_usd: float = 0.0
26
+ error: str | None = None
27
+ retry_count: int = 0 # Number of retry attempts for API calls
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ """Convert to dictionary representation."""
31
+ return {
32
+ "original_span_id": self.original_span_id,
33
+ "span_type": self.span_type,
34
+ "name": self.name,
35
+ "input": self.input,
36
+ "output": self.output,
37
+ "was_modified": self.was_modified,
38
+ "modification_details": self.modification_details,
39
+ "duration_ms": self.duration_ms,
40
+ "tokens_used": self.tokens_used,
41
+ "cost_usd": self.cost_usd,
42
+ "error": self.error,
43
+ "retry_count": self.retry_count,
44
+ }
45
+
46
+
47
+ @dataclass
48
+ class ReplayResult:
49
+ """Complete result of replaying a trace.
50
+
51
+ Contains all replayed spans, aggregated metrics, and final output.
52
+ """
53
+
54
+ trace_id: str
55
+ spans: list[ReplayedSpan] = field(default_factory=list)
56
+ total_duration_ms: float = 0.0
57
+ total_tokens: int = 0
58
+ total_cost_usd: float = 0.0
59
+ final_output: Any = None
60
+ errors: list[str] = field(default_factory=list)
61
+ modifications_applied: dict[str, Any] = field(default_factory=dict)
62
+
63
+ @property
64
+ def success(self) -> bool:
65
+ """Check if replay completed without errors."""
66
+ return len(self.errors) == 0 and all(s.error is None for s in self.spans)
67
+
68
+ @property
69
+ def modified_span_count(self) -> int:
70
+ """Count how many spans were modified."""
71
+ return sum(1 for s in self.spans if s.was_modified)
72
+
73
+ def to_dict(self) -> dict[str, Any]:
74
+ """Convert to dictionary representation."""
75
+ return {
76
+ "trace_id": self.trace_id,
77
+ "spans": [s.to_dict() for s in self.spans],
78
+ "total_duration_ms": self.total_duration_ms,
79
+ "total_tokens": self.total_tokens,
80
+ "total_cost_usd": self.total_cost_usd,
81
+ "final_output": self.final_output,
82
+ "errors": self.errors,
83
+ "modifications_applied": self.modifications_applied,
84
+ "success": self.success,
85
+ "modified_span_count": self.modified_span_count,
86
+ }
87
+
88
+
89
+ @dataclass
90
+ class SpanDifference:
91
+ """Difference between two span executions.
92
+
93
+ Captures what changed between original and modified replay.
94
+ """
95
+
96
+ span_name: str
97
+ span_type: str
98
+ field: str
99
+ original_value: Any
100
+ modified_value: Any
101
+ semantic_similarity: float | None = None
102
+ exact_match: bool = False
103
+
104
+ def to_dict(self) -> dict[str, Any]:
105
+ """Convert to dictionary representation."""
106
+ return {
107
+ "span_name": self.span_name,
108
+ "span_type": self.span_type,
109
+ "field": self.field,
110
+ "original_value": self.original_value,
111
+ "modified_value": self.modified_value,
112
+ "semantic_similarity": self.semantic_similarity,
113
+ "exact_match": self.exact_match,
114
+ }
115
+
116
+
117
+ @dataclass
118
+ class ReplayComparison:
119
+ """Comparison between two replay results.
120
+
121
+ Highlights differences and provides summary statistics.
122
+ """
123
+
124
+ original: ReplayResult
125
+ modified: ReplayResult
126
+ differences: list[SpanDifference] = field(default_factory=list)
127
+ summary: str = ""
128
+ semantic_similarity_available: bool = False # Whether sentence-transformers is available
129
+ semantic_similarity_model: str | None = None # Model used for similarity (if available)
130
+
131
+ @property
132
+ def identical_spans(self) -> int:
133
+ """Count spans with identical outputs."""
134
+ return len(self.original.spans) - len(self.differences)
135
+
136
+ @property
137
+ def changed_spans(self) -> int:
138
+ """Count spans with different outputs."""
139
+ return len(set(d.span_name for d in self.differences))
140
+
141
+ @property
142
+ def total_cost_delta(self) -> float:
143
+ """Calculate cost difference."""
144
+ return self.modified.total_cost_usd - self.original.total_cost_usd
145
+
146
+ @property
147
+ def total_tokens_delta(self) -> int:
148
+ """Calculate token usage difference."""
149
+ return self.modified.total_tokens - self.original.total_tokens
150
+
151
+ def generate_summary(self) -> str:
152
+ """Generate human-readable summary of differences."""
153
+ total_spans = len(self.original.spans)
154
+ changed = self.changed_spans
155
+ identical = self.identical_spans
156
+
157
+ # Calculate percentages (handle zero total_spans)
158
+ identical_pct = (identical / total_spans * 100) if total_spans > 0 else 0.0
159
+ changed_pct = (changed / total_spans * 100) if total_spans > 0 else 0.0
160
+
161
+ lines = [
162
+ f"Replay Comparison Summary",
163
+ f"=" * 50,
164
+ f"Total Spans: {total_spans}",
165
+ f"Identical: {identical} ({identical_pct:.1f}%)",
166
+ f"Changed: {changed} ({changed_pct:.1f}%)",
167
+ f"",
168
+ f"Cost: ${self.original.total_cost_usd:.4f} → ${self.modified.total_cost_usd:.4f} "
169
+ f"({'+' if self.total_cost_delta > 0 else ''}{self.total_cost_delta:.4f})",
170
+ f"Tokens: {self.original.total_tokens} → {self.modified.total_tokens} "
171
+ f"({'+' if self.total_tokens_delta > 0 else ''}{self.total_tokens_delta})",
172
+ ]
173
+
174
+ if self.differences:
175
+ lines.append("")
176
+ lines.append("Key Differences:")
177
+ for diff in self.differences[:5]: # Show top 5
178
+ lines.append(f" • {diff.span_name} ({diff.field})")
179
+ if diff.semantic_similarity is not None:
180
+ lines.append(f" Similarity: {diff.semantic_similarity:.2%}")
181
+
182
+ self.summary = "\n".join(lines)
183
+ return self.summary
184
+
185
+ def to_dict(self) -> dict[str, Any]:
186
+ """Convert to dictionary representation."""
187
+ return {
188
+ "original": self.original.to_dict(),
189
+ "modified": self.modified.to_dict(),
190
+ "differences": [d.to_dict() for d in self.differences],
191
+ "summary": self.summary or self.generate_summary(),
192
+ "identical_spans": self.identical_spans,
193
+ "changed_spans": self.changed_spans,
194
+ "total_cost_delta": self.total_cost_delta,
195
+ "total_tokens_delta": self.total_tokens_delta,
196
+ }