evalvault 1.58.0__py3-none-any.whl → 1.59.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. evalvault/adapters/inbound/api/routers/pipeline.py +48 -0
  2. evalvault/adapters/inbound/cli/commands/analyze.py +258 -2
  3. evalvault/adapters/inbound/cli/commands/pipeline.py +5 -1
  4. evalvault/adapters/inbound/cli/commands/run.py +60 -26
  5. evalvault/adapters/inbound/cli/utils/analysis_io.py +2 -2
  6. evalvault/adapters/outbound/analysis/__init__.py +13 -3
  7. evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +2 -1
  8. evalvault/adapters/outbound/analysis/embedding_searcher_module.py +2 -1
  9. evalvault/adapters/outbound/analysis/hypothesis_generator_module.py +359 -0
  10. evalvault/adapters/outbound/analysis/llm_report_module.py +9 -9
  11. evalvault/adapters/outbound/analysis/network_analyzer_module.py +250 -0
  12. evalvault/adapters/outbound/analysis/pipeline_factory.py +3 -0
  13. evalvault/adapters/outbound/analysis/pipeline_helpers.py +1 -1
  14. evalvault/adapters/outbound/analysis/priority_summary_module.py +1 -1
  15. evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py +3 -2
  16. evalvault/adapters/outbound/analysis/timeseries_advanced_module.py +349 -0
  17. evalvault/adapters/outbound/benchmark/lm_eval_adapter.py +1 -1
  18. evalvault/adapters/outbound/improvement/insight_generator.py +13 -10
  19. evalvault/adapters/outbound/improvement/pattern_detector.py +11 -13
  20. evalvault/adapters/outbound/improvement/playbook_loader.py +3 -3
  21. evalvault/adapters/outbound/llm/__init__.py +63 -63
  22. evalvault/adapters/outbound/llm/instructor_factory.py +101 -7
  23. evalvault/adapters/outbound/llm/ollama_adapter.py +8 -1
  24. evalvault/adapters/outbound/llm/token_aware_chat.py +1 -1
  25. evalvault/adapters/outbound/report/__init__.py +2 -0
  26. evalvault/adapters/outbound/report/dashboard_generator.py +197 -0
  27. evalvault/adapters/outbound/storage/postgres_adapter.py +1 -1
  28. evalvault/adapters/outbound/tracer/open_rag_log_handler.py +3 -3
  29. evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py +3 -3
  30. evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py +4 -4
  31. evalvault/config/settings.py +10 -0
  32. evalvault/domain/entities/analysis_pipeline.py +13 -3
  33. evalvault/domain/services/analysis_service.py +3 -3
  34. evalvault/domain/services/evaluator.py +1 -1
  35. evalvault/domain/services/pipeline_template_registry.py +197 -127
  36. evalvault/domain/services/visual_space_service.py +1 -1
  37. {evalvault-1.58.0.dist-info → evalvault-1.59.0.dist-info}/METADATA +10 -4
  38. {evalvault-1.58.0.dist-info → evalvault-1.59.0.dist-info}/RECORD +41 -37
  39. {evalvault-1.58.0.dist-info → evalvault-1.59.0.dist-info}/WHEEL +0 -0
  40. {evalvault-1.58.0.dist-info → evalvault-1.59.0.dist-info}/entry_points.txt +0 -0
  41. {evalvault-1.58.0.dist-info → evalvault-1.59.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -71,6 +71,54 @@ INTENT_CATALOG = {
71
71
  "description": "시간에 따른 추세를 분석합니다.",
72
72
  "sample_query": "메트릭 추세를 분석해줘",
73
73
  },
74
+ AnalysisIntent.ANALYZE_STATISTICAL: {
75
+ "label": "기술 통계량",
76
+ "category": "analysis",
77
+ "description": "메트릭별 기초 통계량을 계산합니다.",
78
+ "sample_query": "기초 통계 분석해줘",
79
+ },
80
+ AnalysisIntent.ANALYZE_NLP: {
81
+ "label": "NLP 분석",
82
+ "category": "analysis",
83
+ "description": "질문/답변 텍스트를 분석합니다.",
84
+ "sample_query": "텍스트 분석해줘",
85
+ },
86
+ AnalysisIntent.ANALYZE_CAUSAL: {
87
+ "label": "인과 관계 분석",
88
+ "category": "analysis",
89
+ "description": "요인별 영향도와 인과 관계를 분석합니다.",
90
+ "sample_query": "인과 관계 분석해줘",
91
+ },
92
+ AnalysisIntent.ANALYZE_NETWORK: {
93
+ "label": "네트워크 분석",
94
+ "category": "analysis",
95
+ "description": "메트릭 간 상관관계 네트워크를 분석합니다.",
96
+ "sample_query": "메트릭 네트워크 분석해줘",
97
+ },
98
+ AnalysisIntent.ANALYZE_PLAYBOOK: {
99
+ "label": "플레이북 분석",
100
+ "category": "analysis",
101
+ "description": "개선 플레이북 기반 진단을 수행합니다.",
102
+ "sample_query": "플레이북으로 분석해줘",
103
+ },
104
+ AnalysisIntent.DETECT_ANOMALIES: {
105
+ "label": "이상 탐지",
106
+ "category": "timeseries",
107
+ "description": "시계열 이상 패턴을 탐지합니다.",
108
+ "sample_query": "이상 탐지해줘",
109
+ },
110
+ AnalysisIntent.FORECAST_PERFORMANCE: {
111
+ "label": "성능 예측",
112
+ "category": "timeseries",
113
+ "description": "미래 성능을 예측합니다.",
114
+ "sample_query": "성능 예측해줘",
115
+ },
116
+ AnalysisIntent.GENERATE_HYPOTHESES: {
117
+ "label": "가설 생성",
118
+ "category": "generation",
119
+ "description": "성능 저하 원인에 대한 가설을 생성합니다.",
120
+ "sample_query": "가설 생성해줘",
121
+ },
74
122
  AnalysisIntent.BENCHMARK_RETRIEVAL: {
75
123
  "label": "검색 벤치마크",
76
124
  "category": "benchmark",
@@ -12,15 +12,18 @@ from rich.table import Table
12
12
 
13
13
  from evalvault.adapters.outbound.analysis import (
14
14
  CausalAnalysisAdapter,
15
+ HypothesisGeneratorModule,
16
+ NetworkAnalyzerModule,
15
17
  NLPAnalysisAdapter,
16
18
  StatisticalAnalysisAdapter,
19
+ TimeSeriesAdvancedModule,
17
20
  )
18
21
  from evalvault.adapters.outbound.analysis.pipeline_factory import (
19
22
  build_analysis_pipeline_service,
20
23
  )
21
24
  from evalvault.adapters.outbound.cache import MemoryCacheAdapter
22
25
  from evalvault.adapters.outbound.llm import get_llm_adapter
23
- from evalvault.adapters.outbound.report import MarkdownReportAdapter
26
+ from evalvault.adapters.outbound.report import DashboardGenerator, MarkdownReportAdapter
24
27
  from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
25
28
  from evalvault.config.phoenix_support import get_phoenix_trace_url
26
29
  from evalvault.config.settings import Settings, apply_profile
@@ -64,6 +67,37 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
64
67
  "-L",
65
68
  help="플레이북 분석에서 LLM 인사이트 생성",
66
69
  ),
70
+ dashboard: bool = typer.Option(False, "--dashboard", help="시각화 대시보드 생성"),
71
+ dashboard_format: str = typer.Option(
72
+ "png", "--dashboard-format", help="대시보드 출력 형식 (png, svg, pdf)"
73
+ ),
74
+ anomaly_detect: bool = typer.Option(
75
+ False, "--anomaly-detect", "-A", help="이상치 탐지 실행 (Phase 2)"
76
+ ),
77
+ window_size: int = typer.Option(
78
+ 200, "--window-size", "-w", help="이상치 탐지 윈도 크기", min=50, max=500
79
+ ),
80
+ forecast: bool = typer.Option(False, "--forecast", "-F", help="성능 예측 실행 (Phase 2)"),
81
+ forecast_horizon: int = typer.Option(
82
+ 3, "--forecast-horizon", help="예측 범위(런 개수)", min=1, max=10
83
+ ),
84
+ network: bool = typer.Option(
85
+ False, "--network", help="메트릭 상관관계 네트워크 생성 (Phase 3)"
86
+ ),
87
+ min_correlation: float = typer.Option(
88
+ 0.5, "--min-correlation", help="네트워크 최소 상관계수", min=0, max=1
89
+ ),
90
+ generate_hypothesis: bool = typer.Option(
91
+ False, "--generate-hypothesis", "-H", help="가설 자동 생성 (Phase 4)"
92
+ ),
93
+ hypothesis_method: str = typer.Option(
94
+ "heuristic",
95
+ "--hypothesis-method",
96
+ help="가설 생성 방식 (heuristic, hyporefine, union)",
97
+ ),
98
+ num_hypotheses: int = typer.Option(
99
+ 5, "--num-hypotheses", help="생성할 가설 수", min=1, max=20
100
+ ),
67
101
  output: Path | None = typer.Option(None, "--output", "-o", help="JSON 출력 파일"),
68
102
  report: Path | None = typer.Option(
69
103
  None, "--report", "-r", help="리포트 출력 파일 (*.md 또는 *.html)"
@@ -77,6 +111,9 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
77
111
  """평가 실행 결과를 분석하고 통계 인사이트를 표시합니다."""
78
112
 
79
113
  resolved_db_path = db_path or Settings().evalvault_db_path
114
+ if resolved_db_path is None:
115
+ _console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
116
+ raise typer.Exit(1)
80
117
  storage = SQLiteStorageAdapter(db_path=resolved_db_path)
81
118
 
82
119
  try:
@@ -161,6 +198,97 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
161
198
  storage.save_analysis(analysis)
162
199
  _console.print(f"\n[green]분석 결과 DB 저장: {resolved_db_path}[/green]")
163
200
 
201
+ if dashboard:
202
+ dashboard_gen = DashboardGenerator()
203
+ _console.print("\n[bold cyan]Generating visualization dashboard...[/bold cyan]")
204
+
205
+ fig = dashboard_gen.generate_evaluation_dashboard(run_id)
206
+
207
+ output_dir = Path("reports/dashboard")
208
+ output_dir.mkdir(parents=True, exist_ok=True)
209
+
210
+ output_path = output_dir / f"dashboard_{run_id[:8]}.{dashboard_format}"
211
+ fig.savefig(output_path, dpi=300, bbox_inches="tight")
212
+ _console.print(f"\n[green]Dashboard saved to: {output_path}[/green]")
213
+
214
+ if anomaly_detect or forecast:
215
+ ts_analyzer = TimeSeriesAdvancedModule(window_size=window_size)
216
+ run_history = storage.list_runs(limit=50)
217
+
218
+ if not run_history or len(run_history) < 5:
219
+ _console.print("[yellow]Need at least 5 runs for time series analysis.[/yellow]")
220
+ else:
221
+ if anomaly_detect:
222
+ _console.print("\n[bold cyan]Running anomaly detection...[/bold cyan]")
223
+ history_data = [
224
+ {
225
+ "run_id": r.run_id,
226
+ "pass_rate": r.pass_rate,
227
+ "timestamp": r.started_at,
228
+ }
229
+ for r in run_history
230
+ ]
231
+ anomaly_result = ts_analyzer.detect_anomalies(history_data)
232
+ _display_anomaly_detection(anomaly_result)
233
+
234
+ if forecast:
235
+ _console.print("\n[bold cyan]Running performance forecasting...[/bold cyan]")
236
+ history_data = [
237
+ {"run_id": r.run_id, "pass_rate": r.pass_rate} for r in run_history
238
+ ]
239
+ forecast_result = ts_analyzer.forecast_performance(
240
+ history_data, horizon=forecast_horizon
241
+ )
242
+ _display_forecast_result(forecast_result)
243
+
244
+ if network:
245
+ _console.print("\n[bold cyan]Building metric correlation network...[/bold cyan]")
246
+ net_analyzer = NetworkAnalyzerModule()
247
+
248
+ if not bundle.statistical or not bundle.statistical.significant_correlations:
249
+ _console.print("[yellow]No significant correlations for network analysis.[/yellow]")
250
+ else:
251
+ correlations_data = [
252
+ {
253
+ "variable1": corr.variable1,
254
+ "variable2": corr.variable2,
255
+ "correlation": corr.correlation,
256
+ "p_value": corr.p_value,
257
+ "is_significant": corr.is_significant,
258
+ }
259
+ for corr in bundle.statistical.significant_correlations
260
+ ]
261
+ graph = net_analyzer.build_correlation_network(
262
+ correlations_data, min_correlation=min_correlation
263
+ )
264
+ net_result = net_analyzer.analyze_metric_network(graph)
265
+ _display_network_analysis(net_result)
266
+
267
+ if generate_hypothesis:
268
+ _console.print(
269
+ f"\n[bold cyan]Generating hypotheses ({hypothesis_method})...[/bold cyan]"
270
+ )
271
+ hyp_gen = HypothesisGeneratorModule(
272
+ method=hypothesis_method, num_hypotheses=num_hypotheses
273
+ )
274
+
275
+ metric_scores = {}
276
+ for metric_name, stats in analysis.metrics_summary.items():
277
+ metric_scores[metric_name] = stats.mean
278
+
279
+ low_performers_data = [
280
+ {
281
+ "question": lp.test_case_id,
282
+ "metric_name": lp.metric_name,
283
+ }
284
+ for lp in (analysis.low_performers or [])
285
+ ]
286
+
287
+ hypotheses = hyp_gen.generate_simple_hypotheses(
288
+ run_id, metric_scores, low_performers_data
289
+ )
290
+ _display_hypothesis_generation(hypotheses, hypothesis_method)
291
+
164
292
  if output:
165
293
  _export_analysis_json(analysis, output, bundle.nlp if nlp else None, improvement_report)
166
294
  _console.print(f"\n[green]분석 결과 내보냄: {output}[/green]")
@@ -192,6 +320,9 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
192
320
  """두 실행을 통계적으로 비교합니다."""
193
321
 
194
322
  resolved_db_path = db_path or Settings().evalvault_db_path
323
+ if resolved_db_path is None:
324
+ _console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
325
+ raise typer.Exit(1)
195
326
  storage = SQLiteStorageAdapter(db_path=resolved_db_path)
196
327
 
197
328
  try:
@@ -220,7 +351,15 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
220
351
  _console.print(f" Phoenix 트레이스: {trace_b}")
221
352
  _console.print(f" 검정: {test}\n")
222
353
 
223
- comparisons = service.compare_runs(run_a, run_b, metrics=metric_list, test_type=test)
354
+ if test == "t-test":
355
+ test_type = "t-test"
356
+ elif test == "mann-whitney":
357
+ test_type = "mann-whitney"
358
+ else:
359
+ _console.print(f"[red]Error: Unsupported test type: {test}[/red]")
360
+ raise typer.Exit(1)
361
+
362
+ comparisons = service.compare_runs(run_a, run_b, metrics=metric_list, test_type=test_type)
224
363
 
225
364
  if not comparisons:
226
365
  _console.print("[yellow]비교할 공통 메트릭이 없습니다.[/yellow]")
@@ -942,6 +1081,123 @@ def _generate_report(
942
1081
  file.write(content)
943
1082
 
944
1083
 
1084
+ def _display_anomaly_detection(anomaly_result) -> None:
1085
+ _console.print("\n[bold]Anomaly Detection Results[/bold]")
1086
+ _console.print(f"Detection method: {anomaly_result.detection_method}")
1087
+ _console.print(f"Threshold: {anomaly_result.threshold:.2f}")
1088
+ _console.print(f"Total runs: {anomaly_result.total_runs}")
1089
+
1090
+ if anomaly_result.anomalies:
1091
+ detected = [a for a in anomaly_result.anomalies if a.is_anomaly]
1092
+ if detected:
1093
+ _console.print(f"\n[red]Detected {len(detected)} anomalies:[/red]")
1094
+ table = Table(show_header=True, header_style="bold cyan")
1095
+ table.add_column("Run ID")
1096
+ table.add_column("Score", justify="right")
1097
+ table.add_column("Pass Rate", justify="right")
1098
+ table.add_column("Severity")
1099
+
1100
+ for anomaly in detected[:10]:
1101
+ severity_color = (
1102
+ "red"
1103
+ if anomaly.severity == "high"
1104
+ else "yellow"
1105
+ if anomaly.severity == "medium"
1106
+ else "green"
1107
+ )
1108
+ table.add_row(
1109
+ anomaly.run_id[:12] + "...",
1110
+ f"{anomaly.anomaly_score:.2f}",
1111
+ f"{anomaly.pass_rate:.1%}",
1112
+ f"[{severity_color}]{anomaly.severity}[/{severity_color}]",
1113
+ )
1114
+ _console.print(table)
1115
+ else:
1116
+ _console.print("[green]No anomalies detected.[/green]")
1117
+
1118
+ if anomaly_result.insights:
1119
+ _console.print("\n[bold]Insights:[/bold]")
1120
+ for insight in anomaly_result.insights:
1121
+ _console.print(f" • {insight}")
1122
+
1123
+
1124
+ def _display_forecast_result(forecast_result) -> None:
1125
+ _console.print("\n[bold]Forecast Results[/bold]")
1126
+ _console.print(f"Method: {forecast_result.method}")
1127
+ _console.print(f"Horizon: {forecast_result.horizon} runs")
1128
+
1129
+ if forecast_result.predicted_values:
1130
+ _console.print("\n[bold]Predicted Pass Rates:[/bold]")
1131
+ table = Table(show_header=True, header_style="bold cyan")
1132
+ table.add_column("Run")
1133
+ table.add_column("Predicted", justify="right")
1134
+
1135
+ for i, value in enumerate(forecast_result.predicted_values, 1):
1136
+ table.add_row(f"+{i}", f"{value:.1%}")
1137
+ _console.print(table)
1138
+
1139
+ avg_forecast = sum(forecast_result.predicted_values) / len(forecast_result.predicted_values)
1140
+ _console.print(f"\nAverage forecast: {avg_forecast:.1%}")
1141
+
1142
+
1143
+ def _display_network_analysis(net_result) -> None:
1144
+ _console.print("\n[bold]Network Analysis Results[/bold]")
1145
+ _console.print(f"Nodes (metrics): {net_result.node_count}")
1146
+ _console.print(f"Edges (correlations): {net_result.edge_count}")
1147
+ _console.print(f"Density: {net_result.density:.3f}")
1148
+ _console.print(f"Avg clustering: {net_result.avg_clustering:.3f}")
1149
+
1150
+ if net_result.communities:
1151
+ _console.print(f"\n[bold]Communities ({len(net_result.communities)}):[/bold]")
1152
+ for i, community in enumerate(net_result.communities):
1153
+ if len(community) > 1:
1154
+ _console.print(f" Community {i + 1}: {', '.join(community)}")
1155
+
1156
+ if net_result.hub_metrics:
1157
+ _console.print("\n[bold]Hub Metrics:[/bold]")
1158
+ for metric in net_result.hub_metrics:
1159
+ _console.print(f" • {metric}")
1160
+
1161
+ if net_result.insights:
1162
+ _console.print("\n[bold]Insights:[/bold]")
1163
+ for insight in net_result.insights:
1164
+ _console.print(f" • {insight}")
1165
+
1166
+
1167
+ def _display_hypothesis_generation(hypotheses, method: str) -> None:
1168
+ _console.print("\n[bold]Hypothesis Generation Results[/bold]")
1169
+ _console.print(f"Method: {method}")
1170
+ _console.print(f"Total hypotheses: {len(hypotheses)}")
1171
+
1172
+ if hypotheses:
1173
+ _console.print("\n[bold]Generated Hypotheses:[/bold]")
1174
+ table = Table(show_header=True, header_style="bold cyan")
1175
+ table.add_column("#")
1176
+ table.add_column("Hypothesis")
1177
+ table.add_column("Metric")
1178
+ table.add_column("Confidence", justify="right")
1179
+ table.add_column("Evidence")
1180
+
1181
+ for i, hyp in enumerate(hypotheses[:10], 1):
1182
+ confidence_color = (
1183
+ "green" if hyp.confidence >= 0.8 else "yellow" if hyp.confidence >= 0.6 else "red"
1184
+ )
1185
+ table.add_row(
1186
+ str(i),
1187
+ hyp.text[:60] + "..." if len(hyp.text) > 60 else hyp.text,
1188
+ hyp.metric_name or "-",
1189
+ f"[{confidence_color}]{hyp.confidence:.2f}[/{confidence_color}]",
1190
+ hyp.evidence[:30] + "..." if len(hyp.evidence) > 30 else hyp.evidence,
1191
+ )
1192
+ _console.print(table)
1193
+
1194
+ high_conf = [h for h in hypotheses if h.confidence >= 0.8]
1195
+ if high_conf:
1196
+ _console.print(
1197
+ f"\n[green]High confidence hypotheses: {len(high_conf)}/{len(hypotheses)}[/green]"
1198
+ )
1199
+
1200
+
945
1201
  __all__ = [
946
1202
  "register_analyze_commands",
947
1203
  "_perform_playbook_analysis",
@@ -36,7 +36,7 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
36
36
  "-o",
37
37
  help="Output file for results (JSON format).",
38
38
  ),
39
- db_path: Path = db_option(help_text="Path to database file."),
39
+ db_path: Path | None = db_option(help_text="Path to database file."),
40
40
  ) -> None:
41
41
  """Analyze evaluation results using natural language query."""
42
42
  from evalvault.adapters.outbound.analysis.pipeline_factory import (
@@ -52,6 +52,10 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
52
52
  if settings.phoenix_enabled:
53
53
  ensure_phoenix_instrumentation(settings, console=console)
54
54
 
55
+ if db_path is None:
56
+ console.print("[red]Error: Database path is not configured.[/red]")
57
+ raise typer.Exit(1)
58
+
55
59
  storage = SQLiteStorageAdapter(db_path=db_path)
56
60
  llm_adapter = None
57
61
  try:
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import os
6
7
  from collections.abc import Callable, Sequence
7
8
  from datetime import date, datetime
8
9
  from pathlib import Path
@@ -794,6 +795,12 @@ def register_run_commands(
794
795
  )
795
796
  raise typer.Exit(1)
796
797
 
798
+ ollama_env_url = os.environ.get("OLLAMA_BASE_URL")
799
+ if ollama_env_url:
800
+ normalized_url = ollama_env_url.strip()
801
+ if normalized_url and "://" not in normalized_url:
802
+ os.environ["OLLAMA_BASE_URL"] = f"http://{normalized_url}"
803
+
797
804
  settings = Settings()
798
805
 
799
806
  # Apply profile (CLI > .env > default)
@@ -915,6 +922,15 @@ def register_run_commands(
915
922
  raise typer.Exit(1) from exc
916
923
 
917
924
  if settings.llm_provider == "ollama":
925
+ base_url = getattr(settings, "ollama_base_url", "")
926
+ if not isinstance(base_url, str):
927
+ base_url = ""
928
+ base_url = base_url.strip()
929
+ if not base_url:
930
+ base_url = "http://localhost:11434"
931
+ elif "://" not in base_url:
932
+ base_url = f"http://{base_url}"
933
+ settings.ollama_base_url = base_url
918
934
  display_model = f"ollama/{settings.ollama_model}"
919
935
  elif settings.llm_provider == "vllm":
920
936
  display_model = f"vllm/{settings.vllm_model}"
@@ -1421,37 +1437,55 @@ def register_run_commands(
1421
1437
  ensure_phoenix_instrumentation(settings, console=console, force=True)
1422
1438
 
1423
1439
  evaluator = RagasEvaluator()
1440
+ llm_adapter = None
1424
1441
  try:
1425
1442
  llm_adapter = get_llm_adapter(settings)
1426
1443
  except Exception as exc:
1427
1444
  provider = str(getattr(settings, "llm_provider", "")).strip().lower()
1428
- fixes: list[str] = []
1429
- if provider == "ollama":
1430
- fixes = [
1431
- "Ollama 서버가 실행 중인지 확인하세요 (기본: http://localhost:11434).",
1432
- "필요 모델을 받아두세요: `ollama pull gemma3:1b` 및 `ollama pull qwen3-embedding:0.6b`.",
1433
- "URL을 바꿨다면 .env의 `OLLAMA_BASE_URL`을 확인하세요.",
1434
- ]
1435
- elif provider == "openai":
1436
- fixes = [
1437
- "`.env`에 `OPENAI_API_KEY`를 설정하세요.",
1438
- "프록시/네트워크가 필요한 환경이면 연결 가능 여부를 확인하세요.",
1439
- ]
1440
- elif provider == "vllm":
1441
- fixes = [
1442
- "`.env`의 `VLLM_BASE_URL`/`VLLM_MODEL` 설정을 확인하세요.",
1443
- "vLLM 서버가 OpenAI 호환 API로 실행 중인지 확인하세요.",
1444
- ]
1445
- else:
1446
- fixes = ["--profile 또는 환경변수 설정을 확인하세요."]
1445
+ recovered = False
1446
+ if provider == "ollama" and "http://" in str(exc):
1447
+ base_url = getattr(settings, "ollama_base_url", "")
1448
+ if not isinstance(base_url, str) or not base_url.strip():
1449
+ base_url = "http://localhost:11434"
1450
+ elif "://" not in base_url:
1451
+ base_url = f"http://{base_url.strip()}"
1452
+ settings.ollama_base_url = base_url
1453
+ try:
1454
+ llm_adapter = get_llm_adapter(settings)
1455
+ recovered = True
1456
+ except Exception as retry_exc:
1457
+ exc = retry_exc
1458
+
1459
+ if not recovered:
1460
+ fixes: list[str] = []
1461
+ if provider == "ollama":
1462
+ fixes = [
1463
+ "Ollama 서버가 실행 중인지 확인하세요 (기본: http://localhost:11434).",
1464
+ "필요 모델을 받아두세요: `ollama pull gemma3:1b` 및 `ollama pull qwen3-embedding:0.6b`.",
1465
+ "URL을 바꿨다면 .env의 `OLLAMA_BASE_URL`을 확인하세요.",
1466
+ ]
1467
+ elif provider == "openai":
1468
+ fixes = [
1469
+ "`.env`에 `OPENAI_API_KEY`를 설정하세요.",
1470
+ "프록시/네트워크가 필요한 환경이면 연결 가능 여부를 확인하세요.",
1471
+ ]
1472
+ elif provider == "vllm":
1473
+ fixes = [
1474
+ "`.env`의 `VLLM_BASE_URL`/`VLLM_MODEL` 설정을 확인하세요.",
1475
+ "vLLM 서버가 OpenAI 호환 API로 실행 중인지 확인하세요.",
1476
+ ]
1477
+ else:
1478
+ fixes = ["--profile 또는 환경변수 설정을 확인하세요."]
1447
1479
 
1448
- print_cli_error(
1449
- console,
1450
- "LLM/임베딩 어댑터를 초기화하지 못했습니다.",
1451
- details=str(exc),
1452
- fixes=fixes,
1453
- )
1454
- raise typer.Exit(1) from exc
1480
+ print_cli_error(
1481
+ console,
1482
+ "LLM/임베딩 어댑터를 초기화하지 못했습니다.",
1483
+ details=str(exc),
1484
+ fixes=fixes,
1485
+ )
1486
+ raise typer.Exit(1) from exc
1487
+
1488
+ assert llm_adapter is not None
1455
1489
 
1456
1490
  memory_adapter: SQLiteDomainMemoryAdapter | None = None
1457
1491
  memory_evaluator: MemoryAwareEvaluator | None = None
@@ -187,9 +187,9 @@ def build_metric_scorecard(
187
187
  threshold = _resolve_threshold(run, metric)
188
188
  pass_rate = pass_rates.get(metric) if isinstance(pass_rates, dict) else None
189
189
  status = "unknown"
190
- if isinstance(mean, (int, float)):
190
+ if isinstance(mean, int | float):
191
191
  status = "pass" if float(mean) >= threshold else "risk"
192
- elif isinstance(pass_rate, (int, float)):
192
+ elif isinstance(pass_rate, int | float):
193
193
  status = "pass" if float(pass_rate) >= 0.7 else "risk"
194
194
  scorecard.append(
195
195
  {
@@ -42,6 +42,9 @@ from evalvault.adapters.outbound.analysis.hybrid_rrf_module import HybridRRFModu
42
42
  from evalvault.adapters.outbound.analysis.hybrid_weighted_module import (
43
43
  HybridWeightedModule,
44
44
  )
45
+ from evalvault.adapters.outbound.analysis.hypothesis_generator_module import (
46
+ HypothesisGeneratorModule,
47
+ )
45
48
  from evalvault.adapters.outbound.analysis.llm_report_module import LLMReportModule
46
49
  from evalvault.adapters.outbound.analysis.low_performer_extractor_module import (
47
50
  LowPerformerExtractorModule,
@@ -53,6 +56,9 @@ from evalvault.adapters.outbound.analysis.morpheme_analyzer_module import (
53
56
  from evalvault.adapters.outbound.analysis.morpheme_quality_checker_module import (
54
57
  MorphemeQualityCheckerModule,
55
58
  )
59
+ from evalvault.adapters.outbound.analysis.network_analyzer_module import (
60
+ NetworkAnalyzerModule,
61
+ )
56
62
  from evalvault.adapters.outbound.analysis.nlp_adapter import NLPAnalysisAdapter
57
63
  from evalvault.adapters.outbound.analysis.nlp_analyzer_module import NLPAnalyzerModule
58
64
  from evalvault.adapters.outbound.analysis.pattern_detector_module import (
@@ -103,6 +109,9 @@ from evalvault.adapters.outbound.analysis.summary_report_module import (
103
109
  from evalvault.adapters.outbound.analysis.time_series_analyzer_module import (
104
110
  TimeSeriesAnalyzerModule,
105
111
  )
112
+ from evalvault.adapters.outbound.analysis.timeseries_advanced_module import (
113
+ TimeSeriesAdvancedModule,
114
+ )
106
115
  from evalvault.adapters.outbound.analysis.trend_detector_module import (
107
116
  TrendDetectorModule,
108
117
  )
@@ -111,16 +120,16 @@ from evalvault.adapters.outbound.analysis.verification_report_module import (
111
120
  )
112
121
 
113
122
  __all__ = [
114
- # Phase 2-3
115
- "CausalAnalysisAdapter",
123
+ "TimeSeriesAdvancedModule",
124
+ "NetworkAnalyzerModule",
116
125
  "NLPAnalysisAdapter",
117
126
  "StatisticalAnalysisAdapter",
118
127
  "BaseAnalysisAdapter",
119
128
  "AnalysisDataProcessor",
120
- # Phase 14
121
129
  "BaseAnalysisModule",
122
130
  "AnalysisReportModule",
123
131
  "BM25SearcherModule",
132
+ "CausalAnalysisAdapter",
124
133
  "CausalAnalyzerModule",
125
134
  "ComparisonReportModule",
126
135
  "DataLoaderModule",
@@ -131,6 +140,7 @@ __all__ = [
131
140
  "EmbeddingSearcherModule",
132
141
  "HybridRRFModule",
133
142
  "HybridWeightedModule",
143
+ "HypothesisGeneratorModule",
134
144
  "LowPerformerExtractorModule",
135
145
  "LLMReportModule",
136
146
  "ModelAnalyzerModule",
@@ -8,7 +8,6 @@ import numpy as np
8
8
 
9
9
  from evalvault.adapters.outbound.analysis.base_module import BaseAnalysisModule
10
10
  from evalvault.adapters.outbound.analysis.pipeline_helpers import get_upstream_output
11
- from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
12
11
  from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
13
12
  from evalvault.config.settings import Settings
14
13
  from evalvault.domain.entities import EvaluationRun
@@ -140,6 +139,8 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
140
139
 
141
140
  if backend_hint == "ollama" or embedding_profile in {"dev", "prod"}:
142
141
  try:
142
+ from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
143
+
143
144
  adapter = OllamaAdapter(settings)
144
145
  retriever = KoreanDenseRetriever(
145
146
  model_name=model_name or settings.ollama_embedding_model,
@@ -12,7 +12,6 @@ from evalvault.adapters.outbound.analysis.pipeline_helpers import (
12
12
  recall_at_k,
13
13
  safe_mean,
14
14
  )
15
- from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
16
15
  from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
17
16
  from evalvault.config.settings import Settings
18
17
  from evalvault.domain.entities import EvaluationRun
@@ -66,6 +65,8 @@ class EmbeddingSearcherModule(BaseAnalysisModule):
66
65
 
67
66
  if embedding_profile in {"dev", "prod"}:
68
67
  try:
68
+ from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
69
+
69
70
  adapter = OllamaAdapter(settings)
70
71
  retriever = KoreanDenseRetriever(
71
72
  model_name=settings.ollama_embedding_model,