levelapp 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. levelapp/__init__.py +0 -0
  2. levelapp/aspects/__init__.py +8 -0
  3. levelapp/aspects/loader.py +253 -0
  4. levelapp/aspects/logger.py +59 -0
  5. levelapp/aspects/monitor.py +617 -0
  6. levelapp/aspects/sanitizer.py +168 -0
  7. levelapp/clients/__init__.py +122 -0
  8. levelapp/clients/anthropic.py +112 -0
  9. levelapp/clients/gemini.py +130 -0
  10. levelapp/clients/groq.py +101 -0
  11. levelapp/clients/huggingface.py +162 -0
  12. levelapp/clients/ionos.py +126 -0
  13. levelapp/clients/mistral.py +106 -0
  14. levelapp/clients/openai.py +116 -0
  15. levelapp/comparator/__init__.py +5 -0
  16. levelapp/comparator/comparator.py +232 -0
  17. levelapp/comparator/extractor.py +108 -0
  18. levelapp/comparator/schemas.py +61 -0
  19. levelapp/comparator/scorer.py +269 -0
  20. levelapp/comparator/utils.py +136 -0
  21. levelapp/config/__init__.py +5 -0
  22. levelapp/config/endpoint.py +199 -0
  23. levelapp/config/prompts.py +57 -0
  24. levelapp/core/__init__.py +0 -0
  25. levelapp/core/base.py +386 -0
  26. levelapp/core/schemas.py +24 -0
  27. levelapp/core/session.py +336 -0
  28. levelapp/endpoint/__init__.py +0 -0
  29. levelapp/endpoint/client.py +188 -0
  30. levelapp/endpoint/client_test.py +41 -0
  31. levelapp/endpoint/manager.py +114 -0
  32. levelapp/endpoint/parsers.py +119 -0
  33. levelapp/endpoint/schemas.py +38 -0
  34. levelapp/endpoint/tester.py +52 -0
  35. levelapp/evaluator/__init__.py +3 -0
  36. levelapp/evaluator/evaluator.py +307 -0
  37. levelapp/metrics/__init__.py +63 -0
  38. levelapp/metrics/embedding.py +56 -0
  39. levelapp/metrics/embeddings/__init__.py +0 -0
  40. levelapp/metrics/embeddings/sentence_transformer.py +30 -0
  41. levelapp/metrics/embeddings/torch_based.py +56 -0
  42. levelapp/metrics/exact.py +182 -0
  43. levelapp/metrics/fuzzy.py +80 -0
  44. levelapp/metrics/token.py +103 -0
  45. levelapp/plugins/__init__.py +0 -0
  46. levelapp/repository/__init__.py +3 -0
  47. levelapp/repository/filesystem.py +203 -0
  48. levelapp/repository/firestore.py +291 -0
  49. levelapp/simulator/__init__.py +3 -0
  50. levelapp/simulator/schemas.py +116 -0
  51. levelapp/simulator/simulator.py +531 -0
  52. levelapp/simulator/utils.py +134 -0
  53. levelapp/visualization/__init__.py +7 -0
  54. levelapp/visualization/charts.py +358 -0
  55. levelapp/visualization/dashboard.py +240 -0
  56. levelapp/visualization/exporter.py +167 -0
  57. levelapp/visualization/templates/base.html +158 -0
  58. levelapp/visualization/templates/comparator_dashboard.html +57 -0
  59. levelapp/visualization/templates/simulator_dashboard.html +111 -0
  60. levelapp/workflow/__init__.py +6 -0
  61. levelapp/workflow/base.py +192 -0
  62. levelapp/workflow/config.py +96 -0
  63. levelapp/workflow/context.py +64 -0
  64. levelapp/workflow/factory.py +42 -0
  65. levelapp/workflow/registration.py +6 -0
  66. levelapp/workflow/runtime.py +19 -0
  67. levelapp-0.1.15.dist-info/METADATA +571 -0
  68. levelapp-0.1.15.dist-info/RECORD +70 -0
  69. levelapp-0.1.15.dist-info/WHEEL +4 -0
  70. levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,358 @@
1
+ """levelapp/visualization/charts.py: Chart generation for evaluation results."""
2
+
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+ from typing import Dict, Any
6
+ from collections import defaultdict
7
+
8
+ from levelapp.simulator.schemas import SimulationResults
9
+ from levelapp.aspects import logger
10
+
11
+
12
+ class ChartGenerator:
13
+ """Generate interactive charts for evaluation results."""
14
+
15
+ def __init__(self, theme: str = "plotly_white"):
16
+ """
17
+ Initialize ChartGenerator with a theme.
18
+
19
+ Args:
20
+ theme: Plotly theme name (plotly, plotly_white, plotly_dark, ggplot2, seaborn, etc.)
21
+ """
22
+ self.theme = theme
23
+ self._color_palette = px.colors.qualitative.Plotly
24
+
25
+ def create_score_trend(
26
+ self, results: SimulationResults, metric: str = "average"
27
+ ) -> go.Figure:
28
+ """
29
+ Create a line chart showing score trends across scripts/attempts.
30
+
31
+ Args:
32
+ results: SimulationResults object
33
+ metric: Which metric to plot (default: "average")
34
+
35
+ Returns:
36
+ Plotly Figure object
37
+ """
38
+ logger.info(f"[ChartGenerator] Creating score trend chart for metric: {metric}")
39
+
40
+ fig = go.Figure()
41
+
42
+ # Extract scores by provider
43
+ provider_scores = defaultdict(list)
44
+ script_ids = []
45
+
46
+ if results.script_results:
47
+ for idx, script_result in enumerate(results.script_results):
48
+ script_id = script_result.script_id
49
+ script_ids.append(script_id)
50
+
51
+ avg_scores = script_result.average_scores
52
+ for provider, score in avg_scores.items():
53
+ if (
54
+ provider != "processing_time"
55
+ and provider != "guardrail"
56
+ and provider != "metadata"
57
+ ):
58
+ provider_scores[provider].append(score)
59
+
60
+ # Create line for each provider
61
+ for idx, (provider, scores) in enumerate(provider_scores.items()):
62
+ fig.add_trace(
63
+ go.Scatter(
64
+ x=script_ids[: len(scores)],
65
+ y=scores,
66
+ mode="lines+markers",
67
+ name=provider.upper(),
68
+ line=dict(
69
+ width=2,
70
+ color=self._color_palette[idx % len(self._color_palette)],
71
+ ),
72
+ marker=dict(size=8),
73
+ )
74
+ )
75
+
76
+ fig.update_layout(
77
+ title="Evaluation Score Trends Across Scripts",
78
+ xaxis_title="Script ID",
79
+ yaxis_title="Score",
80
+ template=self.theme,
81
+ hovermode="x unified",
82
+ legend=dict(
83
+ orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1
84
+ ),
85
+ )
86
+
87
+ return fig
88
+
89
+ def create_provider_comparison(self, results: SimulationResults) -> go.Figure:
90
+ """
91
+ Create a bar chart comparing average scores across providers.
92
+
93
+ Args:
94
+ results: SimulationResults object
95
+
96
+ Returns:
97
+ Plotly Figure object
98
+ """
99
+ logger.info("[ChartGenerator] Creating provider comparison chart")
100
+
101
+ providers = []
102
+ scores = []
103
+
104
+ if results.average_scores:
105
+ for provider, score in results.average_scores.items():
106
+ if provider not in ["processing_time", "guardrail", "metadata"]:
107
+ providers.append(provider.upper())
108
+ scores.append(score)
109
+
110
+ fig = go.Figure(
111
+ data=[
112
+ go.Bar(
113
+ x=providers,
114
+ y=scores,
115
+ marker_color=self._color_palette[: len(providers)],
116
+ text=[f"{s:.3f}" for s in scores],
117
+ textposition="auto",
118
+ )
119
+ ]
120
+ )
121
+
122
+ fig.update_layout(
123
+ title="Average Scores by Provider",
124
+ xaxis_title="Provider",
125
+ yaxis_title="Average Score",
126
+ template=self.theme,
127
+ yaxis=dict(range=[0, 1]),
128
+ )
129
+
130
+ return fig
131
+
132
+ def create_score_distribution(
133
+ self, results: SimulationResults, provider: str
134
+ ) -> go.Figure:
135
+ """
136
+ Create a histogram and box plot showing score distribution for a provider.
137
+
138
+ Args:
139
+ results: SimulationResults object
140
+ provider: Provider name to analyze
141
+
142
+ Returns:
143
+ Plotly Figure object with subplots
144
+ """
145
+ logger.info(
146
+ f"[ChartGenerator] Creating score distribution for provider: {provider}"
147
+ )
148
+
149
+ from plotly.subplots import make_subplots
150
+
151
+ # Collect all scores for the provider
152
+ scores = []
153
+ if results.interaction_results:
154
+ for script_result in results.interaction_results:
155
+ avg_scores = script_result.get("average_scores", {})
156
+ if provider in avg_scores:
157
+ scores.append(avg_scores[provider])
158
+
159
+ # Create subplots
160
+ fig = make_subplots(
161
+ rows=2,
162
+ cols=1,
163
+ subplot_titles=("Score Distribution", "Box Plot"),
164
+ vertical_spacing=0.15,
165
+ )
166
+
167
+ # Histogram
168
+ fig.add_trace(
169
+ go.Histogram(
170
+ x=scores,
171
+ nbinsx=20,
172
+ name="Distribution",
173
+ marker_color=self._color_palette[0],
174
+ ),
175
+ row=1,
176
+ col=1,
177
+ )
178
+
179
+ # Box plot
180
+ fig.add_trace(
181
+ go.Box(
182
+ x=scores,
183
+ name=provider.upper(),
184
+ marker_color=self._color_palette[1],
185
+ boxmean="sd",
186
+ ),
187
+ row=2,
188
+ col=1,
189
+ )
190
+
191
+ fig.update_layout(
192
+ title=f"Score Distribution for {provider.upper()}",
193
+ template=self.theme,
194
+ showlegend=False,
195
+ height=600,
196
+ )
197
+
198
+ fig.update_xaxes(title_text="Score", row=2, col=1)
199
+ fig.update_yaxes(title_text="Frequency", row=1, col=1)
200
+
201
+ return fig
202
+
203
+ def create_metadata_heatmap(self, comparator_results: Dict[str, Any]) -> go.Figure:
204
+ """
205
+ Create a heatmap showing field-level accuracy from comparator results.
206
+
207
+ Args:
208
+ comparator_results: Dictionary of comparator evaluation results
209
+
210
+ Returns:
211
+ Plotly Figure object
212
+ """
213
+ logger.info("[ChartGenerator] Creating metadata accuracy heatmap")
214
+
215
+ fields = []
216
+ scores = []
217
+
218
+ for idx, result in comparator_results.items():
219
+ field_name = result.get("field_name", f"Field {idx}")
220
+ set_scores = result.get("set_scores")
221
+
222
+ if set_scores is not None:
223
+ score = set_scores[0] if isinstance(set_scores, list) else set_scores
224
+ fields.append(field_name)
225
+ scores.append(float(score) if score is not None else 0.0)
226
+
227
+ fig = go.Figure(
228
+ data=go.Heatmap(
229
+ z=[scores],
230
+ x=fields,
231
+ y=["Accuracy"],
232
+ colorscale="RdYlGn",
233
+ text=[[f"{s:.2f}" for s in scores]],
234
+ texttemplate="%{text}",
235
+ textfont={"size": 10},
236
+ colorbar=dict(title="Score"),
237
+ )
238
+ )
239
+
240
+ fig.update_layout(
241
+ title="Metadata Field Accuracy Heatmap",
242
+ xaxis_title="Field Name",
243
+ template=self.theme,
244
+ height=300,
245
+ )
246
+
247
+ return fig
248
+
249
+ def create_interaction_timeline(self, results: SimulationResults) -> go.Figure:
250
+ """
251
+ Create a timeline visualization of interaction performance.
252
+
253
+ Args:
254
+ results: SimulationResults object
255
+
256
+ Returns:
257
+ Plotly Figure object
258
+ """
259
+ logger.info("[ChartGenerator] Creating interaction timeline")
260
+
261
+ fig = go.Figure()
262
+
263
+ if results.interaction_results:
264
+ for script_idx, script_result in enumerate(results.interaction_results):
265
+ script_id = script_result.get("script_id", f"Script {script_idx + 1}")
266
+ attempts = script_result.get("attempts", [])
267
+
268
+ for attempt in attempts:
269
+ attempt_num = attempt.get("attempt", 1)
270
+ duration = attempt.get("total_duration", 0)
271
+
272
+ fig.add_trace(
273
+ go.Bar(
274
+ x=[duration],
275
+ y=[f"{script_id} - Attempt {attempt_num}"],
276
+ orientation="h",
277
+ name=script_id,
278
+ showlegend=False,
279
+ marker_color=self._color_palette[
280
+ script_idx % len(self._color_palette)
281
+ ],
282
+ )
283
+ )
284
+
285
+ fig.update_layout(
286
+ title="Interaction Processing Timeline",
287
+ xaxis_title="Duration (seconds)",
288
+ yaxis_title="Script - Attempt",
289
+ template=self.theme,
290
+ height=max(400, len(results.interaction_results or []) * 60),
291
+ )
292
+
293
+ return fig
294
+
295
+ def create_summary_metrics(self, results: SimulationResults) -> go.Figure:
296
+ """
297
+ Create a summary metrics card visualization.
298
+
299
+ Args:
300
+ results: SimulationResults object
301
+
302
+ Returns:
303
+ Plotly Figure object
304
+ """
305
+ logger.info("[ChartGenerator] Creating summary metrics visualization")
306
+
307
+ from plotly.subplots import make_subplots
308
+
309
+ # Calculate summary metrics
310
+ total_scripts = len(results.interaction_results or [])
311
+ avg_score = (
312
+ results.average_scores.get("openai", 0) if results.average_scores else 0
313
+ )
314
+ total_time = results.elapsed_time
315
+
316
+ fig = make_subplots(
317
+ rows=1,
318
+ cols=3,
319
+ subplot_titles=("Total Scripts", "Avg Score", "Total Time (s)"),
320
+ specs=[
321
+ [{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]
322
+ ],
323
+ )
324
+
325
+ fig.add_trace(
326
+ go.Indicator(
327
+ mode="number",
328
+ value=total_scripts,
329
+ number={"font": {"size": 40}},
330
+ ),
331
+ row=1,
332
+ col=1,
333
+ )
334
+
335
+ fig.add_trace(
336
+ go.Indicator(
337
+ mode="number+delta",
338
+ value=avg_score,
339
+ number={"font": {"size": 40}},
340
+ delta={"reference": 0.8, "relative": False},
341
+ ),
342
+ row=1,
343
+ col=2,
344
+ )
345
+
346
+ fig.add_trace(
347
+ go.Indicator(
348
+ mode="number",
349
+ value=total_time,
350
+ number={"font": {"size": 40}, "suffix": "s"},
351
+ ),
352
+ row=1,
353
+ col=3,
354
+ )
355
+
356
+ fig.update_layout(template=self.theme, height=200, showlegend=False)
357
+
358
+ return fig
@@ -0,0 +1,240 @@
1
+ """levelapp/visualization/dashboard.py: Dashboard generation for evaluation results."""
2
+
3
+ from pathlib import Path
4
+ from typing import Dict, Any
5
+ from datetime import datetime
6
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
7
+
8
+ from levelapp.simulator.schemas import SimulationResults
9
+ from levelapp.visualization.charts import ChartGenerator
10
+ from levelapp.aspects import logger
11
+
12
+
13
+ class DashboardGenerator:
14
+ """Generate comprehensive HTML dashboards for evaluation results."""
15
+
16
+ def __init__(self, template_dir: str | None = None):
17
+ """
18
+ Initialize DashboardGenerator.
19
+
20
+ Args:
21
+ template_dir: Optional custom template directory path
22
+ """
23
+ if template_dir is None:
24
+ # Use default templates directory
25
+ template_dir = Path(__file__).parent / "templates"
26
+
27
+ self.template_dir = Path(template_dir)
28
+ self.env = Environment(
29
+ loader=FileSystemLoader(str(self.template_dir)),
30
+ autoescape=select_autoescape(["html", "xml"]),
31
+ )
32
+ self.chart_gen = ChartGenerator()
33
+
34
+ def generate_simulator_dashboard(
35
+ self,
36
+ results: SimulationResults,
37
+ output_path: str,
38
+ title: str = "Evaluation Dashboard",
39
+ ) -> str:
40
+ """
41
+ Generate a complete HTML dashboard for simulator results.
42
+
43
+ Args:
44
+ results: SimulationResults object
45
+ output_path: Path to save the HTML file
46
+ title: Dashboard title
47
+
48
+ Returns:
49
+ Path to the generated HTML file
50
+ """
51
+ logger.info(
52
+ f"[DashboardGenerator] Generating simulator dashboard: {output_path}"
53
+ )
54
+
55
+ # Generate all charts
56
+ charts = {
57
+ "score_trend": self.chart_gen.create_score_trend(results),
58
+ "provider_comparison": self.chart_gen.create_provider_comparison(results),
59
+ "summary_metrics": self.chart_gen.create_summary_metrics(results),
60
+ }
61
+
62
+ # Add distribution charts for each provider
63
+ if results.average_scores:
64
+ for provider in results.average_scores.keys():
65
+ if provider not in ["processing_time", "guardrail", "metadata"]:
66
+ try:
67
+ charts[f"distribution_{provider}"] = (
68
+ self.chart_gen.create_score_distribution(results, provider)
69
+ )
70
+ except Exception as e:
71
+ logger.warning(
72
+ f"Failed to create distribution chart for {provider}: {e}"
73
+ )
74
+
75
+ # Add timeline chart
76
+ try:
77
+ charts["timeline"] = self.chart_gen.create_interaction_timeline(results)
78
+ except Exception as e:
79
+ logger.warning(f"Failed to create timeline chart: {e}")
80
+
81
+ # Convert charts to HTML
82
+ chart_htmls = {}
83
+ for name, fig in charts.items():
84
+ chart_htmls[name] = fig.to_html(
85
+ include_plotlyjs="cdn",
86
+ div_id=f"chart_{name}",
87
+ config={"responsive": True},
88
+ )
89
+
90
+ # Calculate summary statistics
91
+ summary_stats = self._create_summary_stats(results)
92
+
93
+ # Prepare context for template
94
+ context = {
95
+ "title": title,
96
+ "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
97
+ "summary_stats": summary_stats,
98
+ "charts": chart_htmls,
99
+ "results": results,
100
+ "has_evaluation_summary": bool(results.evaluation_summary),
101
+ }
102
+
103
+ # Render template
104
+ html_content = self._render_template("simulator_dashboard.html", context)
105
+
106
+ # Save to file
107
+ output_file = Path(output_path)
108
+ output_file.parent.mkdir(parents=True, exist_ok=True)
109
+ output_file.write_text(html_content, encoding="utf-8")
110
+
111
+ logger.info(f"[DashboardGenerator] Dashboard saved to: {output_path}")
112
+ return str(output_file.absolute())
113
+
114
+ def generate_comparator_dashboard(
115
+ self,
116
+ results: Dict[str, Any],
117
+ output_path: str,
118
+ title: str = "Comparison Dashboard",
119
+ ) -> str:
120
+ """
121
+ Generate a complete HTML dashboard for comparator results.
122
+
123
+ Args:
124
+ results: Comparator results dictionary
125
+ output_path: Path to save the HTML file
126
+ title: Dashboard title
127
+
128
+ Returns:
129
+ Path to the generated HTML file
130
+ """
131
+ logger.info(
132
+ f"[DashboardGenerator] Generating comparator dashboard: {output_path}"
133
+ )
134
+
135
+ # Generate heatmap
136
+ heatmap = self.chart_gen.create_metadata_heatmap(results)
137
+
138
+ # Convert to HTML
139
+ chart_html = heatmap.to_html(
140
+ include_plotlyjs="cdn", div_id="chart_heatmap", config={"responsive": True}
141
+ )
142
+
143
+ # Prepare context
144
+ context = {
145
+ "title": title,
146
+ "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
147
+ "chart_html": chart_html,
148
+ "results": results,
149
+ }
150
+
151
+ # Render template
152
+ html_content = self._render_template("comparator_dashboard.html", context)
153
+
154
+ # Save to file
155
+ output_file = Path(output_path)
156
+ output_file.parent.mkdir(parents=True, exist_ok=True)
157
+ output_file.write_text(html_content, encoding="utf-8")
158
+
159
+ logger.info(f"[DashboardGenerator] Dashboard saved to: {output_path}")
160
+ return str(output_file.absolute())
161
+
162
+ def _create_summary_stats(self, results: SimulationResults) -> Dict[str, Any]:
163
+ """
164
+ Extract key summary statistics from results.
165
+
166
+ Args:
167
+ results: SimulationResults object
168
+
169
+ Returns:
170
+ Dictionary of summary statistics
171
+ """
172
+ stats = {
173
+ "total_scripts": len(results.interaction_results or []),
174
+ "total_time": results.elapsed_time,
175
+ "started_at": results.started_at.strftime("%Y-%m-%d %H:%M:%S"),
176
+ "finished_at": results.finished_at.strftime("%Y-%m-%d %H:%M:%S"),
177
+ "average_scores": results.average_scores or {},
178
+ "providers": list(results.average_scores.keys())
179
+ if results.average_scores
180
+ else [],
181
+ }
182
+
183
+ # Calculate overall average (excluding non-score metrics)
184
+ score_values = [
185
+ v
186
+ for k, v in (results.average_scores or {}).items()
187
+ if k not in ["processing_time", "guardrail", "metadata"]
188
+ ]
189
+ stats["overall_average"] = (
190
+ sum(score_values) / len(score_values) if score_values else 0.0
191
+ )
192
+
193
+ return stats
194
+
195
+ def _render_template(self, template_name: str, context: Dict[str, Any]) -> str:
196
+ """
197
+ Render a Jinja2 template with the given context.
198
+
199
+ Args:
200
+ template_name: Name of the template file
201
+ context: Template context dictionary
202
+
203
+ Returns:
204
+ Rendered HTML string
205
+ """
206
+ try:
207
+ template = self.env.get_template(template_name)
208
+ return template.render(**context)
209
+ except Exception as e:
210
+ logger.error(f"[DashboardGenerator] Template rendering failed: {e}")
211
+ # Return a basic HTML fallback
212
+ return self._create_fallback_html(context)
213
+
214
+ def _create_fallback_html(self, context: Dict[str, Any]) -> str:
215
+ """Create a basic HTML fallback if template rendering fails."""
216
+ return f"""
217
+ <!DOCTYPE html>
218
+ <html>
219
+ <head>
220
+ <title>{context.get("title", "Evaluation Dashboard")}</title>
221
+ <meta charset="utf-8">
222
+ <meta name="viewport" content="width=device-width, initial-scale=1">
223
+ <style>
224
+ body {{ font-family: Arial, sans-serif; margin: 20px; }}
225
+ .container {{ max-width: 1200px; margin: 0 auto; }}
226
+ h1 {{ color: #333; }}
227
+ .chart {{ margin: 20px 0; }}
228
+ </style>
229
+ </head>
230
+ <body>
231
+ <div class="container">
232
+ <h1>{context.get("title", "Evaluation Dashboard")}</h1>
233
+ <p>Generated at: {context.get("generated_at", "N/A")}</p>
234
+ <div class="charts">
235
+ {"".join(context.get("charts", {}).values())}
236
+ </div>
237
+ </div>
238
+ </body>
239
+ </html>
240
+ """