evalvault 1.72.1__py3-none-any.whl → 1.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/routers/pipeline.py +6 -0
- evalvault/adapters/inbound/cli/commands/analyze.py +40 -1
- evalvault/adapters/inbound/cli/commands/pipeline.py +100 -0
- evalvault/adapters/inbound/cli/commands/regress.py +96 -0
- evalvault/adapters/inbound/cli/commands/stage.py +217 -24
- evalvault/adapters/outbound/analysis/__init__.py +4 -0
- evalvault/adapters/outbound/analysis/dataset_feature_analyzer_module.py +458 -0
- evalvault/adapters/outbound/analysis/pipeline_factory.py +1 -0
- evalvault/adapters/outbound/analysis/statistical_adapter.py +12 -6
- evalvault/adapters/outbound/improvement/pattern_detector.py +4 -0
- evalvault/adapters/outbound/storage/base_sql.py +160 -0
- evalvault/adapters/outbound/storage/postgres_adapter.py +132 -8
- evalvault/adapters/outbound/storage/postgres_schema.sql +15 -0
- evalvault/adapters/outbound/storage/schema.sql +18 -1
- evalvault/adapters/outbound/storage/sqlite_adapter.py +115 -1
- evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +23 -1
- evalvault/domain/entities/analysis.py +1 -0
- evalvault/domain/entities/analysis_pipeline.py +1 -0
- evalvault/domain/entities/stage.py +13 -0
- evalvault/domain/services/intent_classifier.py +13 -0
- evalvault/domain/services/pipeline_template_registry.py +22 -0
- evalvault/ports/outbound/storage_port.py +32 -0
- {evalvault-1.72.1.dist-info → evalvault-1.73.0.dist-info}/METADATA +2 -1
- {evalvault-1.72.1.dist-info → evalvault-1.73.0.dist-info}/RECORD +27 -26
- {evalvault-1.72.1.dist-info → evalvault-1.73.0.dist-info}/WHEEL +0 -0
- {evalvault-1.72.1.dist-info → evalvault-1.73.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.72.1.dist-info → evalvault-1.73.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -84,6 +84,12 @@ INTENT_CATALOG = {
|
|
|
84
84
|
"description": "질문/답변 텍스트를 분석합니다.",
|
|
85
85
|
"sample_query": "텍스트 분석해줘",
|
|
86
86
|
},
|
|
87
|
+
AnalysisIntent.ANALYZE_DATASET_FEATURES: {
|
|
88
|
+
"label": "데이터셋 특성 분석",
|
|
89
|
+
"category": "analysis",
|
|
90
|
+
"description": "질문/답변/컨텍스트 특성을 추출하고 메트릭 상관을 분석합니다.",
|
|
91
|
+
"sample_query": "데이터셋 특성 분석해줘",
|
|
92
|
+
},
|
|
87
93
|
AnalysisIntent.ANALYZE_CAUSAL: {
|
|
88
94
|
"label": "인과 관계 분석",
|
|
89
95
|
"category": "analysis",
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
7
8
|
|
|
8
9
|
import typer
|
|
9
10
|
from rich.console import Console
|
|
@@ -21,6 +22,7 @@ from evalvault.adapters.outbound.analysis import (
|
|
|
21
22
|
from evalvault.adapters.outbound.analysis.pipeline_factory import (
|
|
22
23
|
build_analysis_pipeline_service,
|
|
23
24
|
)
|
|
25
|
+
from evalvault.adapters.outbound.analysis.pipeline_helpers import to_serializable
|
|
24
26
|
from evalvault.adapters.outbound.cache import MemoryCacheAdapter
|
|
25
27
|
from evalvault.adapters.outbound.llm import get_llm_adapter
|
|
26
28
|
from evalvault.adapters.outbound.report import DashboardGenerator, MarkdownReportAdapter
|
|
@@ -102,6 +104,9 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
102
104
|
report: Path | None = typer.Option(
|
|
103
105
|
None, "--report", "-r", help="리포트 출력 파일 (*.md 또는 *.html)"
|
|
104
106
|
),
|
|
107
|
+
excel_output: Path | None = typer.Option(
|
|
108
|
+
None, "--excel-output", help="분석 결과 Excel 출력 경로"
|
|
109
|
+
),
|
|
105
110
|
save: bool = typer.Option(False, "--save", "-S", help="분석 결과 DB 저장"),
|
|
106
111
|
db_path: Path | None = db_option(help_text="DB 경로"),
|
|
107
112
|
profile: str | None = profile_option(
|
|
@@ -194,8 +199,24 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
194
199
|
stage_metrics=stage_metrics,
|
|
195
200
|
)
|
|
196
201
|
|
|
197
|
-
|
|
202
|
+
def _save_analysis_payload(payload: Any, analysis_type: str) -> None:
|
|
203
|
+
serialized = to_serializable(payload)
|
|
204
|
+
if not isinstance(serialized, dict):
|
|
205
|
+
serialized = {"value": serialized}
|
|
206
|
+
storage.save_analysis_result(
|
|
207
|
+
run_id=run_id,
|
|
208
|
+
analysis_type=analysis_type,
|
|
209
|
+
result_data=serialized,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if save or excel_output:
|
|
198
213
|
storage.save_analysis(analysis)
|
|
214
|
+
if bundle.nlp is not None:
|
|
215
|
+
storage.save_nlp_analysis(bundle.nlp)
|
|
216
|
+
if bundle.causal is not None:
|
|
217
|
+
_save_analysis_payload(bundle.causal, "causal")
|
|
218
|
+
if improvement_report is not None:
|
|
219
|
+
_save_analysis_payload(improvement_report, "playbook")
|
|
199
220
|
_console.print(f"\n[green]분석 결과 DB 저장: {resolved_db_path}[/green]")
|
|
200
221
|
|
|
201
222
|
if dashboard:
|
|
@@ -211,6 +232,8 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
211
232
|
fig.savefig(output_path, dpi=300, bbox_inches="tight")
|
|
212
233
|
_console.print(f"\n[green]Dashboard saved to: {output_path}[/green]")
|
|
213
234
|
|
|
235
|
+
anomaly_result = None
|
|
236
|
+
forecast_result = None
|
|
214
237
|
if anomaly_detect or forecast:
|
|
215
238
|
ts_analyzer = TimeSeriesAdvancedModule(window_size=window_size)
|
|
216
239
|
run_history = storage.list_runs(limit=50)
|
|
@@ -241,6 +264,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
241
264
|
)
|
|
242
265
|
_display_forecast_result(forecast_result)
|
|
243
266
|
|
|
267
|
+
net_result = None
|
|
244
268
|
if network:
|
|
245
269
|
_console.print("\n[bold cyan]Building metric correlation network...[/bold cyan]")
|
|
246
270
|
net_analyzer = NetworkAnalyzerModule()
|
|
@@ -264,6 +288,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
264
288
|
net_result = net_analyzer.analyze_metric_network(graph)
|
|
265
289
|
_display_network_analysis(net_result)
|
|
266
290
|
|
|
291
|
+
hypotheses = None
|
|
267
292
|
if generate_hypothesis:
|
|
268
293
|
_console.print(
|
|
269
294
|
f"\n[bold cyan]Generating hypotheses ({hypothesis_method})...[/bold cyan]"
|
|
@@ -289,6 +314,16 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
289
314
|
)
|
|
290
315
|
_display_hypothesis_generation(hypotheses, hypothesis_method)
|
|
291
316
|
|
|
317
|
+
if save or excel_output:
|
|
318
|
+
if anomaly_result is not None:
|
|
319
|
+
_save_analysis_payload(anomaly_result, "time_series_anomaly")
|
|
320
|
+
if forecast_result is not None:
|
|
321
|
+
_save_analysis_payload(forecast_result, "time_series_forecast")
|
|
322
|
+
if net_result is not None:
|
|
323
|
+
_save_analysis_payload(net_result, "network")
|
|
324
|
+
if hypotheses is not None:
|
|
325
|
+
_save_analysis_payload(hypotheses, "hypotheses")
|
|
326
|
+
|
|
292
327
|
if output:
|
|
293
328
|
_export_analysis_json(analysis, output, bundle.nlp if nlp else None, improvement_report)
|
|
294
329
|
_console.print(f"\n[green]분석 결과 내보냄: {output}[/green]")
|
|
@@ -297,6 +332,10 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
297
332
|
_generate_report(bundle, report, include_nlp=nlp, improvement_report=improvement_report)
|
|
298
333
|
_console.print(f"\n[green]리포트 생성: {report}[/green]")
|
|
299
334
|
|
|
335
|
+
if excel_output:
|
|
336
|
+
exported = storage.export_analysis_results_to_excel(run_id, excel_output)
|
|
337
|
+
_console.print(f"\n[green]Excel 생성: {exported}[/green]")
|
|
338
|
+
|
|
300
339
|
@app.command(name="analyze-compare")
|
|
301
340
|
@app.command(name="compare-analysis")
|
|
302
341
|
def analyze_compare(
|
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
from pathlib import Path
|
|
8
|
+
from uuid import uuid4
|
|
7
9
|
|
|
8
10
|
import typer
|
|
9
11
|
from rich.panel import Panel
|
|
@@ -37,6 +39,11 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
|
|
|
37
39
|
"-o",
|
|
38
40
|
help="Output file for results (JSON format).",
|
|
39
41
|
),
|
|
42
|
+
excel_output: Path | None = typer.Option(
|
|
43
|
+
None,
|
|
44
|
+
"--excel-output",
|
|
45
|
+
help="분석 결과 Excel 출력 경로",
|
|
46
|
+
),
|
|
40
47
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
41
48
|
) -> None:
|
|
42
49
|
"""Analyze evaluation results using natural language query."""
|
|
@@ -73,6 +80,8 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
|
|
|
73
80
|
result = service.analyze(query, run_id=run_id)
|
|
74
81
|
|
|
75
82
|
saved_analysis_id: str | None = None
|
|
83
|
+
saved_dataset_features_id: str | None = None
|
|
84
|
+
saved_additional_ids: list[str] = []
|
|
76
85
|
stats_node = result.get_node_result("statistical_analyzer")
|
|
77
86
|
if stats_node and isinstance(stats_node.output, dict):
|
|
78
87
|
analysis_obj = stats_node.output.get("analysis")
|
|
@@ -84,12 +93,92 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
|
|
|
84
93
|
f"[yellow]Warning: Failed to store analysis result ({exc})[/yellow]"
|
|
85
94
|
)
|
|
86
95
|
|
|
96
|
+
dataset_node = result.get_node_result("dataset_feature_analysis")
|
|
97
|
+
if dataset_node and isinstance(dataset_node.output, dict):
|
|
98
|
+
dataset_run_id = None
|
|
99
|
+
summary = dataset_node.output.get("summary")
|
|
100
|
+
if isinstance(summary, dict):
|
|
101
|
+
dataset_run_id = summary.get("run_id")
|
|
102
|
+
resolved_run_id = dataset_run_id or run_id
|
|
103
|
+
if resolved_run_id:
|
|
104
|
+
try:
|
|
105
|
+
saved_dataset_features_id = storage.save_dataset_feature_analysis(
|
|
106
|
+
run_id=resolved_run_id,
|
|
107
|
+
result_data=dataset_node.output,
|
|
108
|
+
)
|
|
109
|
+
except Exception as exc: # pragma: no cover - best effort for CLI UX
|
|
110
|
+
console.print(
|
|
111
|
+
"[yellow]Warning: Failed to store dataset feature analysis "
|
|
112
|
+
f"({exc})[/yellow]"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
skip_nodes = {
|
|
116
|
+
"load_data",
|
|
117
|
+
"load_runs",
|
|
118
|
+
"load_run",
|
|
119
|
+
"statistical_analyzer",
|
|
120
|
+
"dataset_feature_analysis",
|
|
121
|
+
}
|
|
122
|
+
for node_id, node_result in result.node_results.items():
|
|
123
|
+
if node_id in skip_nodes:
|
|
124
|
+
continue
|
|
125
|
+
if not isinstance(node_result.output, dict) or not node_result.output:
|
|
126
|
+
continue
|
|
127
|
+
resolved_run_id = run_id
|
|
128
|
+
if resolved_run_id is None:
|
|
129
|
+
summary = (
|
|
130
|
+
node_result.output.get("summary")
|
|
131
|
+
if isinstance(node_result.output, dict)
|
|
132
|
+
else None
|
|
133
|
+
)
|
|
134
|
+
if isinstance(summary, dict) and summary.get("run_id"):
|
|
135
|
+
resolved_run_id = summary.get("run_id")
|
|
136
|
+
elif node_result.output.get("run_id"):
|
|
137
|
+
resolved_run_id = node_result.output.get("run_id")
|
|
138
|
+
if not resolved_run_id:
|
|
139
|
+
continue
|
|
140
|
+
try:
|
|
141
|
+
saved_id = storage.save_analysis_result(
|
|
142
|
+
run_id=resolved_run_id,
|
|
143
|
+
analysis_type=node_id,
|
|
144
|
+
result_data=node_result.output,
|
|
145
|
+
)
|
|
146
|
+
saved_additional_ids.append(saved_id)
|
|
147
|
+
except Exception as exc: # pragma: no cover - best effort for CLI UX
|
|
148
|
+
console.print(
|
|
149
|
+
f"[yellow]Warning: Failed to store {node_id} analysis ({exc})[/yellow]"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
record = serialize_pipeline_result(result)
|
|
154
|
+
record.update(
|
|
155
|
+
{
|
|
156
|
+
"result_id": str(uuid4()),
|
|
157
|
+
"intent": result.intent.value if result.intent else None,
|
|
158
|
+
"query": query,
|
|
159
|
+
"run_id": run_id,
|
|
160
|
+
"pipeline_id": result.pipeline_id,
|
|
161
|
+
"created_at": datetime.now().isoformat(),
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
storage.save_pipeline_result(record)
|
|
165
|
+
except Exception as exc: # pragma: no cover - best effort for CLI UX
|
|
166
|
+
console.print(f"[yellow]Warning: Failed to store pipeline result ({exc})[/yellow]")
|
|
167
|
+
|
|
87
168
|
if result.is_complete:
|
|
88
169
|
console.print("[green]Pipeline completed successfully![/green]")
|
|
89
170
|
console.print(f"Duration: {result.total_duration_ms}ms")
|
|
90
171
|
console.print(f"Nodes executed: {len(result.node_results)}")
|
|
91
172
|
if saved_analysis_id:
|
|
92
173
|
console.print(f"Analysis saved as [blue]{saved_analysis_id}[/blue]")
|
|
174
|
+
if saved_dataset_features_id:
|
|
175
|
+
console.print(
|
|
176
|
+
f"Dataset feature analysis saved as [blue]{saved_dataset_features_id}[/blue]"
|
|
177
|
+
)
|
|
178
|
+
if saved_additional_ids:
|
|
179
|
+
console.print(
|
|
180
|
+
f"Additional analysis saved: [blue]{len(saved_additional_ids)}[/blue] entries"
|
|
181
|
+
)
|
|
93
182
|
|
|
94
183
|
if result.final_output:
|
|
95
184
|
console.print("\n[bold]Results:[/bold]")
|
|
@@ -111,6 +200,16 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
|
|
|
111
200
|
json.dump(payload, f, ensure_ascii=False, indent=2)
|
|
112
201
|
console.print(f"\n[green]Results saved to {output}[/green]")
|
|
113
202
|
|
|
203
|
+
if excel_output:
|
|
204
|
+
if not run_id:
|
|
205
|
+
console.print("[yellow]Warning: run_id is required for Excel export.[/yellow]")
|
|
206
|
+
else:
|
|
207
|
+
try:
|
|
208
|
+
exported = storage.export_analysis_results_to_excel(run_id, excel_output)
|
|
209
|
+
console.print(f"\n[green]Excel saved to {exported}[/green]")
|
|
210
|
+
except Exception as exc: # pragma: no cover - best effort for CLI UX
|
|
211
|
+
console.print(f"[yellow]Warning: Excel export failed ({exc})[/yellow]")
|
|
212
|
+
|
|
114
213
|
console.print()
|
|
115
214
|
|
|
116
215
|
@pipeline_app.command("intents")
|
|
@@ -139,6 +238,7 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
|
|
|
139
238
|
AnalysisIntent.ANALYZE_PATTERNS: ("Analysis", "패턴 분석"),
|
|
140
239
|
AnalysisIntent.ANALYZE_TRENDS: ("Analysis", "추세 분석"),
|
|
141
240
|
AnalysisIntent.BENCHMARK_RETRIEVAL: ("Benchmark", "검색 벤치마크"),
|
|
241
|
+
AnalysisIntent.ANALYZE_DATASET_FEATURES: ("Analysis", "데이터셋 특성 분석"),
|
|
142
242
|
AnalysisIntent.GENERATE_SUMMARY: ("Report", "요약 보고서 생성"),
|
|
143
243
|
AnalysisIntent.GENERATE_DETAILED: ("Report", "상세 보고서 생성"),
|
|
144
244
|
AnalysisIntent.GENERATE_COMPARISON: ("Report", "비교 보고서 생성"),
|
|
@@ -368,6 +368,102 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
368
368
|
if not gate_passed and fail_on_regression:
|
|
369
369
|
raise typer.Exit(2)
|
|
370
370
|
|
|
371
|
+
@app.command(name="regress-baseline")
|
|
372
|
+
def regress_baseline(
|
|
373
|
+
action: str = typer.Argument(
|
|
374
|
+
...,
|
|
375
|
+
help="Action: 'set' to save baseline, 'get' to retrieve baseline run_id.",
|
|
376
|
+
),
|
|
377
|
+
baseline_key: str = typer.Option(
|
|
378
|
+
"default",
|
|
379
|
+
"--key",
|
|
380
|
+
"-k",
|
|
381
|
+
help="Baseline key identifier (default: 'default').",
|
|
382
|
+
),
|
|
383
|
+
run_id: str | None = typer.Option(
|
|
384
|
+
None,
|
|
385
|
+
"--run-id",
|
|
386
|
+
"-r",
|
|
387
|
+
help="Run ID to set as baseline (required for 'set').",
|
|
388
|
+
),
|
|
389
|
+
dataset_name: str | None = typer.Option(
|
|
390
|
+
None,
|
|
391
|
+
"--dataset",
|
|
392
|
+
help="Dataset name for the baseline.",
|
|
393
|
+
),
|
|
394
|
+
branch: str | None = typer.Option(
|
|
395
|
+
None,
|
|
396
|
+
"--branch",
|
|
397
|
+
help="Git branch name.",
|
|
398
|
+
),
|
|
399
|
+
commit_sha: str | None = typer.Option(
|
|
400
|
+
None,
|
|
401
|
+
"--commit",
|
|
402
|
+
help="Git commit SHA.",
|
|
403
|
+
),
|
|
404
|
+
output_format: str = typer.Option(
|
|
405
|
+
"text",
|
|
406
|
+
"--format",
|
|
407
|
+
"-f",
|
|
408
|
+
help="Output format: text, json.",
|
|
409
|
+
),
|
|
410
|
+
db_path: Path | None = db_option(default=None, help_text="Database path"),
|
|
411
|
+
) -> None:
|
|
412
|
+
"""Manage regression baselines for CI/CD integration."""
|
|
413
|
+
if db_path is None:
|
|
414
|
+
console.print("[red]Error:[/red] Database path is not configured.")
|
|
415
|
+
raise typer.Exit(1)
|
|
416
|
+
|
|
417
|
+
storage = SQLiteStorageAdapter(db_path=db_path)
|
|
418
|
+
|
|
419
|
+
if action == "set":
|
|
420
|
+
if not run_id:
|
|
421
|
+
console.print("[red]Error:[/red] --run-id is required for 'set' action.")
|
|
422
|
+
raise typer.Exit(1)
|
|
423
|
+
try:
|
|
424
|
+
storage.get_run(run_id)
|
|
425
|
+
except KeyError:
|
|
426
|
+
console.print(f"[red]Error:[/red] Run not found: {run_id}")
|
|
427
|
+
raise typer.Exit(1)
|
|
428
|
+
|
|
429
|
+
storage.set_regression_baseline(
|
|
430
|
+
baseline_key,
|
|
431
|
+
run_id,
|
|
432
|
+
dataset_name=dataset_name,
|
|
433
|
+
branch=branch,
|
|
434
|
+
commit_sha=commit_sha,
|
|
435
|
+
)
|
|
436
|
+
if output_format == "json":
|
|
437
|
+
console.print(
|
|
438
|
+
json.dumps(
|
|
439
|
+
{"status": "ok", "baseline_key": baseline_key, "run_id": run_id},
|
|
440
|
+
ensure_ascii=False,
|
|
441
|
+
)
|
|
442
|
+
)
|
|
443
|
+
else:
|
|
444
|
+
console.print(f"[green]Baseline '{baseline_key}' set to run_id: {run_id}[/green]")
|
|
445
|
+
elif action == "get":
|
|
446
|
+
baseline = storage.get_regression_baseline(baseline_key)
|
|
447
|
+
if not baseline:
|
|
448
|
+
if output_format == "json":
|
|
449
|
+
console.print(
|
|
450
|
+
json.dumps(
|
|
451
|
+
{"status": "not_found", "baseline_key": baseline_key},
|
|
452
|
+
ensure_ascii=False,
|
|
453
|
+
)
|
|
454
|
+
)
|
|
455
|
+
else:
|
|
456
|
+
console.print(f"[yellow]Baseline '{baseline_key}' not found.[/yellow]")
|
|
457
|
+
raise typer.Exit(1)
|
|
458
|
+
|
|
459
|
+
if output_format == "json":
|
|
460
|
+
console.print(json.dumps(baseline, ensure_ascii=False, indent=2, default=str))
|
|
461
|
+
else:
|
|
462
|
+
console.print(baseline["run_id"])
|
|
463
|
+
else:
|
|
464
|
+
console.print(f"[red]Error:[/red] Unknown action: {action}. Use 'set' or 'get'.")
|
|
465
|
+
raise typer.Exit(1)
|
|
466
|
+
|
|
371
467
|
|
|
372
468
|
def _render_table(report: RegressionGateReport, console: Console) -> None:
|
|
373
469
|
console.print(f"\n[bold]Regression Gate Check: {report.candidate_run_id}[/bold]\n")
|