evalvault 1.72.0__py3-none-any.whl → 1.73.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. evalvault/adapters/inbound/api/routers/pipeline.py +6 -0
  2. evalvault/adapters/inbound/cli/commands/analyze.py +40 -1
  3. evalvault/adapters/inbound/cli/commands/pipeline.py +100 -0
  4. evalvault/adapters/inbound/cli/commands/regress.py +96 -0
  5. evalvault/adapters/inbound/cli/commands/stage.py +217 -24
  6. evalvault/adapters/outbound/analysis/__init__.py +4 -0
  7. evalvault/adapters/outbound/analysis/dataset_feature_analyzer_module.py +458 -0
  8. evalvault/adapters/outbound/analysis/pipeline_factory.py +1 -0
  9. evalvault/adapters/outbound/analysis/statistical_adapter.py +12 -6
  10. evalvault/adapters/outbound/improvement/pattern_detector.py +4 -0
  11. evalvault/adapters/outbound/storage/base_sql.py +160 -0
  12. evalvault/adapters/outbound/storage/postgres_adapter.py +132 -8
  13. evalvault/adapters/outbound/storage/postgres_schema.sql +15 -0
  14. evalvault/adapters/outbound/storage/schema.sql +18 -1
  15. evalvault/adapters/outbound/storage/sqlite_adapter.py +115 -1
  16. evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +23 -1
  17. evalvault/config/settings.py +2 -1
  18. evalvault/domain/entities/analysis.py +1 -0
  19. evalvault/domain/entities/analysis_pipeline.py +1 -0
  20. evalvault/domain/entities/stage.py +13 -0
  21. evalvault/domain/services/intent_classifier.py +13 -0
  22. evalvault/domain/services/pipeline_template_registry.py +22 -0
  23. evalvault/ports/outbound/storage_port.py +32 -0
  24. {evalvault-1.72.0.dist-info → evalvault-1.73.0.dist-info}/METADATA +2 -1
  25. {evalvault-1.72.0.dist-info → evalvault-1.73.0.dist-info}/RECORD +28 -27
  26. {evalvault-1.72.0.dist-info → evalvault-1.73.0.dist-info}/WHEEL +0 -0
  27. {evalvault-1.72.0.dist-info → evalvault-1.73.0.dist-info}/entry_points.txt +0 -0
  28. {evalvault-1.72.0.dist-info → evalvault-1.73.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -84,6 +84,12 @@ INTENT_CATALOG = {
84
84
  "description": "질문/답변 텍스트를 분석합니다.",
85
85
  "sample_query": "텍스트 분석해줘",
86
86
  },
87
+ AnalysisIntent.ANALYZE_DATASET_FEATURES: {
88
+ "label": "데이터셋 특성 분석",
89
+ "category": "analysis",
90
+ "description": "질문/답변/컨텍스트 특성을 추출하고 메트릭 상관을 분석합니다.",
91
+ "sample_query": "데이터셋 특성 분석해줘",
92
+ },
87
93
  AnalysisIntent.ANALYZE_CAUSAL: {
88
94
  "label": "인과 관계 분석",
89
95
  "category": "analysis",
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  from pathlib import Path
7
+ from typing import Any
7
8
 
8
9
  import typer
9
10
  from rich.console import Console
@@ -21,6 +22,7 @@ from evalvault.adapters.outbound.analysis import (
21
22
  from evalvault.adapters.outbound.analysis.pipeline_factory import (
22
23
  build_analysis_pipeline_service,
23
24
  )
25
+ from evalvault.adapters.outbound.analysis.pipeline_helpers import to_serializable
24
26
  from evalvault.adapters.outbound.cache import MemoryCacheAdapter
25
27
  from evalvault.adapters.outbound.llm import get_llm_adapter
26
28
  from evalvault.adapters.outbound.report import DashboardGenerator, MarkdownReportAdapter
@@ -102,6 +104,9 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
102
104
  report: Path | None = typer.Option(
103
105
  None, "--report", "-r", help="리포트 출력 파일 (*.md 또는 *.html)"
104
106
  ),
107
+ excel_output: Path | None = typer.Option(
108
+ None, "--excel-output", help="분석 결과 Excel 출력 경로"
109
+ ),
105
110
  save: bool = typer.Option(False, "--save", "-S", help="분석 결과 DB 저장"),
106
111
  db_path: Path | None = db_option(help_text="DB 경로"),
107
112
  profile: str | None = profile_option(
@@ -194,8 +199,24 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
194
199
  stage_metrics=stage_metrics,
195
200
  )
196
201
 
197
- if save:
202
+ def _save_analysis_payload(payload: Any, analysis_type: str) -> None:
203
+ serialized = to_serializable(payload)
204
+ if not isinstance(serialized, dict):
205
+ serialized = {"value": serialized}
206
+ storage.save_analysis_result(
207
+ run_id=run_id,
208
+ analysis_type=analysis_type,
209
+ result_data=serialized,
210
+ )
211
+
212
+ if save or excel_output:
198
213
  storage.save_analysis(analysis)
214
+ if bundle.nlp is not None:
215
+ storage.save_nlp_analysis(bundle.nlp)
216
+ if bundle.causal is not None:
217
+ _save_analysis_payload(bundle.causal, "causal")
218
+ if improvement_report is not None:
219
+ _save_analysis_payload(improvement_report, "playbook")
199
220
  _console.print(f"\n[green]분석 결과 DB 저장: {resolved_db_path}[/green]")
200
221
 
201
222
  if dashboard:
@@ -211,6 +232,8 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
211
232
  fig.savefig(output_path, dpi=300, bbox_inches="tight")
212
233
  _console.print(f"\n[green]Dashboard saved to: {output_path}[/green]")
213
234
 
235
+ anomaly_result = None
236
+ forecast_result = None
214
237
  if anomaly_detect or forecast:
215
238
  ts_analyzer = TimeSeriesAdvancedModule(window_size=window_size)
216
239
  run_history = storage.list_runs(limit=50)
@@ -241,6 +264,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
241
264
  )
242
265
  _display_forecast_result(forecast_result)
243
266
 
267
+ net_result = None
244
268
  if network:
245
269
  _console.print("\n[bold cyan]Building metric correlation network...[/bold cyan]")
246
270
  net_analyzer = NetworkAnalyzerModule()
@@ -264,6 +288,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
264
288
  net_result = net_analyzer.analyze_metric_network(graph)
265
289
  _display_network_analysis(net_result)
266
290
 
291
+ hypotheses = None
267
292
  if generate_hypothesis:
268
293
  _console.print(
269
294
  f"\n[bold cyan]Generating hypotheses ({hypothesis_method})...[/bold cyan]"
@@ -289,6 +314,16 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
289
314
  )
290
315
  _display_hypothesis_generation(hypotheses, hypothesis_method)
291
316
 
317
+ if save or excel_output:
318
+ if anomaly_result is not None:
319
+ _save_analysis_payload(anomaly_result, "time_series_anomaly")
320
+ if forecast_result is not None:
321
+ _save_analysis_payload(forecast_result, "time_series_forecast")
322
+ if net_result is not None:
323
+ _save_analysis_payload(net_result, "network")
324
+ if hypotheses is not None:
325
+ _save_analysis_payload(hypotheses, "hypotheses")
326
+
292
327
  if output:
293
328
  _export_analysis_json(analysis, output, bundle.nlp if nlp else None, improvement_report)
294
329
  _console.print(f"\n[green]분석 결과 내보냄: {output}[/green]")
@@ -297,6 +332,10 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
297
332
  _generate_report(bundle, report, include_nlp=nlp, improvement_report=improvement_report)
298
333
  _console.print(f"\n[green]리포트 생성: {report}[/green]")
299
334
 
335
+ if excel_output:
336
+ exported = storage.export_analysis_results_to_excel(run_id, excel_output)
337
+ _console.print(f"\n[green]Excel 생성: {exported}[/green]")
338
+
300
339
  @app.command(name="analyze-compare")
301
340
  @app.command(name="compare-analysis")
302
341
  def analyze_compare(
@@ -3,7 +3,9 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import json
6
+ from datetime import datetime
6
7
  from pathlib import Path
8
+ from uuid import uuid4
7
9
 
8
10
  import typer
9
11
  from rich.panel import Panel
@@ -37,6 +39,11 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
37
39
  "-o",
38
40
  help="Output file for results (JSON format).",
39
41
  ),
42
+ excel_output: Path | None = typer.Option(
43
+ None,
44
+ "--excel-output",
45
+ help="분석 결과 Excel 출력 경로",
46
+ ),
40
47
  db_path: Path | None = db_option(help_text="Path to database file."),
41
48
  ) -> None:
42
49
  """Analyze evaluation results using natural language query."""
@@ -73,6 +80,8 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
73
80
  result = service.analyze(query, run_id=run_id)
74
81
 
75
82
  saved_analysis_id: str | None = None
83
+ saved_dataset_features_id: str | None = None
84
+ saved_additional_ids: list[str] = []
76
85
  stats_node = result.get_node_result("statistical_analyzer")
77
86
  if stats_node and isinstance(stats_node.output, dict):
78
87
  analysis_obj = stats_node.output.get("analysis")
@@ -84,12 +93,92 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
84
93
  f"[yellow]Warning: Failed to store analysis result ({exc})[/yellow]"
85
94
  )
86
95
 
96
+ dataset_node = result.get_node_result("dataset_feature_analysis")
97
+ if dataset_node and isinstance(dataset_node.output, dict):
98
+ dataset_run_id = None
99
+ summary = dataset_node.output.get("summary")
100
+ if isinstance(summary, dict):
101
+ dataset_run_id = summary.get("run_id")
102
+ resolved_run_id = dataset_run_id or run_id
103
+ if resolved_run_id:
104
+ try:
105
+ saved_dataset_features_id = storage.save_dataset_feature_analysis(
106
+ run_id=resolved_run_id,
107
+ result_data=dataset_node.output,
108
+ )
109
+ except Exception as exc: # pragma: no cover - best effort for CLI UX
110
+ console.print(
111
+ "[yellow]Warning: Failed to store dataset feature analysis "
112
+ f"({exc})[/yellow]"
113
+ )
114
+
115
+ skip_nodes = {
116
+ "load_data",
117
+ "load_runs",
118
+ "load_run",
119
+ "statistical_analyzer",
120
+ "dataset_feature_analysis",
121
+ }
122
+ for node_id, node_result in result.node_results.items():
123
+ if node_id in skip_nodes:
124
+ continue
125
+ if not isinstance(node_result.output, dict) or not node_result.output:
126
+ continue
127
+ resolved_run_id = run_id
128
+ if resolved_run_id is None:
129
+ summary = (
130
+ node_result.output.get("summary")
131
+ if isinstance(node_result.output, dict)
132
+ else None
133
+ )
134
+ if isinstance(summary, dict) and summary.get("run_id"):
135
+ resolved_run_id = summary.get("run_id")
136
+ elif node_result.output.get("run_id"):
137
+ resolved_run_id = node_result.output.get("run_id")
138
+ if not resolved_run_id:
139
+ continue
140
+ try:
141
+ saved_id = storage.save_analysis_result(
142
+ run_id=resolved_run_id,
143
+ analysis_type=node_id,
144
+ result_data=node_result.output,
145
+ )
146
+ saved_additional_ids.append(saved_id)
147
+ except Exception as exc: # pragma: no cover - best effort for CLI UX
148
+ console.print(
149
+ f"[yellow]Warning: Failed to store {node_id} analysis ({exc})[/yellow]"
150
+ )
151
+
152
+ try:
153
+ record = serialize_pipeline_result(result)
154
+ record.update(
155
+ {
156
+ "result_id": str(uuid4()),
157
+ "intent": result.intent.value if result.intent else None,
158
+ "query": query,
159
+ "run_id": run_id,
160
+ "pipeline_id": result.pipeline_id,
161
+ "created_at": datetime.now().isoformat(),
162
+ }
163
+ )
164
+ storage.save_pipeline_result(record)
165
+ except Exception as exc: # pragma: no cover - best effort for CLI UX
166
+ console.print(f"[yellow]Warning: Failed to store pipeline result ({exc})[/yellow]")
167
+
87
168
  if result.is_complete:
88
169
  console.print("[green]Pipeline completed successfully![/green]")
89
170
  console.print(f"Duration: {result.total_duration_ms}ms")
90
171
  console.print(f"Nodes executed: {len(result.node_results)}")
91
172
  if saved_analysis_id:
92
173
  console.print(f"Analysis saved as [blue]{saved_analysis_id}[/blue]")
174
+ if saved_dataset_features_id:
175
+ console.print(
176
+ f"Dataset feature analysis saved as [blue]{saved_dataset_features_id}[/blue]"
177
+ )
178
+ if saved_additional_ids:
179
+ console.print(
180
+ f"Additional analysis saved: [blue]{len(saved_additional_ids)}[/blue] entries"
181
+ )
93
182
 
94
183
  if result.final_output:
95
184
  console.print("\n[bold]Results:[/bold]")
@@ -111,6 +200,16 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
111
200
  json.dump(payload, f, ensure_ascii=False, indent=2)
112
201
  console.print(f"\n[green]Results saved to {output}[/green]")
113
202
 
203
+ if excel_output:
204
+ if not run_id:
205
+ console.print("[yellow]Warning: run_id is required for Excel export.[/yellow]")
206
+ else:
207
+ try:
208
+ exported = storage.export_analysis_results_to_excel(run_id, excel_output)
209
+ console.print(f"\n[green]Excel saved to {exported}[/green]")
210
+ except Exception as exc: # pragma: no cover - best effort for CLI UX
211
+ console.print(f"[yellow]Warning: Excel export failed ({exc})[/yellow]")
212
+
114
213
  console.print()
115
214
 
116
215
  @pipeline_app.command("intents")
@@ -139,6 +238,7 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
139
238
  AnalysisIntent.ANALYZE_PATTERNS: ("Analysis", "패턴 분석"),
140
239
  AnalysisIntent.ANALYZE_TRENDS: ("Analysis", "추세 분석"),
141
240
  AnalysisIntent.BENCHMARK_RETRIEVAL: ("Benchmark", "검색 벤치마크"),
241
+ AnalysisIntent.ANALYZE_DATASET_FEATURES: ("Analysis", "데이터셋 특성 분석"),
142
242
  AnalysisIntent.GENERATE_SUMMARY: ("Report", "요약 보고서 생성"),
143
243
  AnalysisIntent.GENERATE_DETAILED: ("Report", "상세 보고서 생성"),
144
244
  AnalysisIntent.GENERATE_COMPARISON: ("Report", "비교 보고서 생성"),
@@ -368,6 +368,102 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
368
368
  if not gate_passed and fail_on_regression:
369
369
  raise typer.Exit(2)
370
370
 
371
+ @app.command(name="regress-baseline")
372
+ def regress_baseline(
373
+ action: str = typer.Argument(
374
+ ...,
375
+ help="Action: 'set' to save baseline, 'get' to retrieve baseline run_id.",
376
+ ),
377
+ baseline_key: str = typer.Option(
378
+ "default",
379
+ "--key",
380
+ "-k",
381
+ help="Baseline key identifier (default: 'default').",
382
+ ),
383
+ run_id: str | None = typer.Option(
384
+ None,
385
+ "--run-id",
386
+ "-r",
387
+ help="Run ID to set as baseline (required for 'set').",
388
+ ),
389
+ dataset_name: str | None = typer.Option(
390
+ None,
391
+ "--dataset",
392
+ help="Dataset name for the baseline.",
393
+ ),
394
+ branch: str | None = typer.Option(
395
+ None,
396
+ "--branch",
397
+ help="Git branch name.",
398
+ ),
399
+ commit_sha: str | None = typer.Option(
400
+ None,
401
+ "--commit",
402
+ help="Git commit SHA.",
403
+ ),
404
+ output_format: str = typer.Option(
405
+ "text",
406
+ "--format",
407
+ "-f",
408
+ help="Output format: text, json.",
409
+ ),
410
+ db_path: Path | None = db_option(default=None, help_text="Database path"),
411
+ ) -> None:
412
+ """Manage regression baselines for CI/CD integration."""
413
+ if db_path is None:
414
+ console.print("[red]Error:[/red] Database path is not configured.")
415
+ raise typer.Exit(1)
416
+
417
+ storage = SQLiteStorageAdapter(db_path=db_path)
418
+
419
+ if action == "set":
420
+ if not run_id:
421
+ console.print("[red]Error:[/red] --run-id is required for 'set' action.")
422
+ raise typer.Exit(1)
423
+ try:
424
+ storage.get_run(run_id)
425
+ except KeyError:
426
+ console.print(f"[red]Error:[/red] Run not found: {run_id}")
427
+ raise typer.Exit(1)
428
+
429
+ storage.set_regression_baseline(
430
+ baseline_key,
431
+ run_id,
432
+ dataset_name=dataset_name,
433
+ branch=branch,
434
+ commit_sha=commit_sha,
435
+ )
436
+ if output_format == "json":
437
+ console.print(
438
+ json.dumps(
439
+ {"status": "ok", "baseline_key": baseline_key, "run_id": run_id},
440
+ ensure_ascii=False,
441
+ )
442
+ )
443
+ else:
444
+ console.print(f"[green]Baseline '{baseline_key}' set to run_id: {run_id}[/green]")
445
+ elif action == "get":
446
+ baseline = storage.get_regression_baseline(baseline_key)
447
+ if not baseline:
448
+ if output_format == "json":
449
+ console.print(
450
+ json.dumps(
451
+ {"status": "not_found", "baseline_key": baseline_key},
452
+ ensure_ascii=False,
453
+ )
454
+ )
455
+ else:
456
+ console.print(f"[yellow]Baseline '{baseline_key}' not found.[/yellow]")
457
+ raise typer.Exit(1)
458
+
459
+ if output_format == "json":
460
+ console.print(json.dumps(baseline, ensure_ascii=False, indent=2, default=str))
461
+ else:
462
+ console.print(baseline["run_id"])
463
+ else:
464
+ console.print(f"[red]Error:[/red] Unknown action: {action}. Use 'set' or 'get'.")
465
+ raise typer.Exit(1)
466
+
371
467
 
372
468
  def _render_table(report: RegressionGateReport, console: Console) -> None:
373
469
  console.print(f"\n[bold]Regression Gate Check: {report.candidate_run_id}[/bold]\n")