evalvault 1.75.0__py3-none-any.whl → 1.77.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ from evalvault.adapters.outbound.judge_calibration_reporter import JudgeCalibrat
24
24
  from evalvault.adapters.outbound.ops.report_renderer import render_json, render_markdown
25
25
  from evalvault.adapters.outbound.report import MarkdownReportAdapter
26
26
  from evalvault.config.phoenix_support import PhoenixExperimentResolver
27
- from evalvault.config.settings import Settings
27
+ from evalvault.config.settings import Settings, resolve_tracker_providers
28
28
  from evalvault.domain.entities import (
29
29
  CalibrationResult,
30
30
  FeedbackSummary,
@@ -217,56 +217,83 @@ class WebUIAdapter:
217
217
  logger.warning(f"Failed to create LLM adapter for {model_id}: {e}, using default")
218
218
  return self._llm_adapter
219
219
 
220
- def _get_tracker(
220
+ def _get_trackers(
221
221
  self,
222
222
  settings: Settings,
223
223
  tracker_config: dict[str, Any] | None,
224
- ) -> tuple[str | None, Any | None]:
225
- provider = (tracker_config or {}).get("provider") or "none"
226
- provider = provider.lower()
227
-
228
- if provider in {"none", ""}:
229
- return None, None
230
-
231
- if provider == "langfuse":
232
- if not settings.langfuse_public_key or not settings.langfuse_secret_key:
233
- logger.warning("Langfuse credentials missing; skipping tracker logging.")
234
- return None, None
235
- from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
236
-
237
- return provider, LangfuseAdapter(
238
- public_key=settings.langfuse_public_key,
239
- secret_key=settings.langfuse_secret_key,
240
- host=settings.langfuse_host,
241
- )
224
+ ) -> list[tuple[str, Any]]:
225
+ provider = (tracker_config or {}).get("provider") or settings.tracker_provider or "none"
226
+ providers = resolve_tracker_providers(provider)
227
+ if not providers or providers == ["none"]:
228
+ return []
229
+ required = {"mlflow", "phoenix"}
230
+ if not required.issubset(set(providers)):
231
+ raise RuntimeError("Tracker must include both mlflow and phoenix")
232
+
233
+ trackers: list[tuple[str, Any]] = []
234
+ for entry in providers:
235
+ if entry == "langfuse":
236
+ if not settings.langfuse_public_key or not settings.langfuse_secret_key:
237
+ raise RuntimeError("Langfuse credentials missing")
238
+ from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
239
+
240
+ trackers.append(
241
+ (
242
+ entry,
243
+ LangfuseAdapter(
244
+ public_key=settings.langfuse_public_key,
245
+ secret_key=settings.langfuse_secret_key,
246
+ host=settings.langfuse_host,
247
+ ),
248
+ )
249
+ )
250
+ continue
242
251
 
243
- if provider == "phoenix":
244
- from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
252
+ if entry == "phoenix":
253
+ from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
245
254
 
246
- ensure_phoenix_instrumentation(settings, force=True)
247
- try:
248
- from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
249
- except ImportError as exc:
250
- logger.warning("Phoenix extras not installed: %s", exc)
251
- return None, None
252
- return provider, PhoenixAdapter(endpoint=settings.phoenix_endpoint)
253
-
254
- if provider == "mlflow":
255
- if not settings.mlflow_tracking_uri:
256
- logger.warning("MLflow tracking URI missing; skipping tracker logging.")
257
- return None, None
258
- try:
259
- from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
260
- except ImportError as exc:
261
- logger.warning("MLflow adapter unavailable: %s", exc)
262
- return None, None
263
- return provider, MLflowAdapter(
264
- tracking_uri=settings.mlflow_tracking_uri,
265
- experiment_name=settings.mlflow_experiment_name,
266
- )
255
+ ensure_phoenix_instrumentation(settings, force=True)
256
+ try:
257
+ from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
258
+ except ImportError as exc:
259
+ raise RuntimeError("Phoenix extras not installed") from exc
260
+ trackers.append(
261
+ (
262
+ entry,
263
+ PhoenixAdapter(
264
+ endpoint=settings.phoenix_endpoint,
265
+ project_name=getattr(settings, "phoenix_project_name", None),
266
+ annotations_enabled=getattr(
267
+ settings,
268
+ "phoenix_annotations_enabled",
269
+ True,
270
+ ),
271
+ ),
272
+ )
273
+ )
274
+ continue
267
275
 
268
- logger.warning("Unknown tracker provider: %s", provider)
269
- return None, None
276
+ if entry == "mlflow":
277
+ if not settings.mlflow_tracking_uri:
278
+ raise RuntimeError("MLflow tracking URI missing")
279
+ try:
280
+ from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
281
+ except ImportError as exc:
282
+ raise RuntimeError("MLflow adapter unavailable") from exc
283
+ trackers.append(
284
+ (
285
+ entry,
286
+ MLflowAdapter(
287
+ tracking_uri=settings.mlflow_tracking_uri,
288
+ experiment_name=settings.mlflow_experiment_name,
289
+ ),
290
+ )
291
+ )
292
+ continue
293
+
294
+ raise RuntimeError(f"Unknown tracker provider: {entry}")
295
+
296
+ return trackers
270
297
 
271
298
  @staticmethod
272
299
  def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
@@ -425,7 +452,11 @@ class WebUIAdapter:
425
452
  dataset.metadata["domain"] = requested_domain
426
453
 
427
454
  settings = self._settings or Settings()
428
- tracker_provider, tracker = self._get_tracker(settings, request.tracker_config)
455
+ try:
456
+ trackers = self._get_trackers(settings, request.tracker_config)
457
+ except RuntimeError as exc:
458
+ raise RuntimeError(f"Tracker configuration error: {exc}") from exc
459
+ tracker_providers = [provider for provider, _ in trackers]
429
460
  stage_store = bool(request.stage_store)
430
461
 
431
462
  retriever_instance = None
@@ -484,7 +515,7 @@ class WebUIAdapter:
484
515
  )
485
516
  from evalvault.domain.services.memory_aware_evaluator import MemoryAwareEvaluator
486
517
 
487
- tracer = PhoenixTracerAdapter() if tracker_provider == "phoenix" else None
518
+ tracer = PhoenixTracerAdapter() if "phoenix" in tracker_providers else None
488
519
  memory_adapter = build_domain_memory_adapter(
489
520
  settings=self._settings,
490
521
  db_path=Path(memory_db_path) if memory_db_path else None,
@@ -536,11 +567,31 @@ class WebUIAdapter:
536
567
 
537
568
  # 2. 진행률 초기화
538
569
  start_time = time.monotonic()
570
+ total_cases = len(dataset.test_cases)
571
+
572
+ def emit_progress(message: str, *, status: str = "running") -> None:
573
+ if not on_progress:
574
+ return
575
+ elapsed = time.monotonic() - start_time
576
+ rate = (total_cases / elapsed) if total_cases > 0 and elapsed > 0 else None
577
+ on_progress(
578
+ EvalProgress(
579
+ current=total_cases,
580
+ total=total_cases,
581
+ current_metric=message,
582
+ percent=100.0 if total_cases > 0 else 0.0,
583
+ status=status,
584
+ elapsed_seconds=elapsed,
585
+ eta_seconds=0.0,
586
+ rate=rate,
587
+ )
588
+ )
589
+
539
590
  if on_progress:
540
591
  on_progress(
541
592
  EvalProgress(
542
593
  current=0,
543
- total=len(dataset.test_cases),
594
+ total=total_cases,
544
595
  current_metric="",
545
596
  percent=0.0,
546
597
  status="running",
@@ -696,24 +747,31 @@ class WebUIAdapter:
696
747
  str(request.threshold_profile).strip().lower()
697
748
  )
698
749
 
699
- if tracker and tracker_provider:
700
- try:
701
- trace_id = tracker.log_evaluation_run(result)
702
- if tracker_provider == "phoenix":
703
- endpoint = settings.phoenix_endpoint or "http://localhost:6006/v1/traces"
704
- phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
705
- phoenix_meta.update(
706
- {
707
- "trace_id": trace_id,
708
- "endpoint": endpoint,
709
- "trace_url": self._build_phoenix_trace_url(endpoint, trace_id),
710
- "schema_version": 2,
711
- }
712
- )
713
- except Exception as exc:
714
- logger.warning("Tracker logging failed: %s", exc)
750
+ if trackers:
751
+ emit_progress("Logging trackers...", status="finalizing")
752
+ result.tracker_metadata.setdefault("tracker_providers", tracker_providers)
753
+ for provider, tracker in trackers:
754
+ try:
755
+ trace_id = tracker.log_evaluation_run(result)
756
+ provider_meta = result.tracker_metadata.setdefault(provider, {})
757
+ if isinstance(provider_meta, dict):
758
+ provider_meta.setdefault("trace_id", trace_id)
759
+ if provider == "phoenix":
760
+ endpoint = settings.phoenix_endpoint or "http://localhost:6006/v1/traces"
761
+ phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
762
+ phoenix_meta.update(
763
+ {
764
+ "trace_id": trace_id,
765
+ "endpoint": endpoint,
766
+ "trace_url": self._build_phoenix_trace_url(endpoint, trace_id),
767
+ "schema_version": 2,
768
+ }
769
+ )
770
+ except Exception as exc:
771
+ raise RuntimeError(f"Tracker logging failed for {provider}: {exc}") from exc
715
772
 
716
773
  if stage_store and self._storage and hasattr(self._storage, "save_stage_events"):
774
+ emit_progress("Storing stage events...", status="finalizing")
717
775
  try:
718
776
  prompt_metadata_entries = self._build_prompt_metadata_entries(prompt_bundle)
719
777
  stage_event_builder = StageEventBuilder()
@@ -755,6 +813,7 @@ class WebUIAdapter:
755
813
 
756
814
  # 5. 결과 저장
757
815
  if self._storage:
816
+ emit_progress("Saving evaluation run...", status="finalizing")
758
817
  logger.info(f"Saving evaluation run: {result.run_id}")
759
818
  if prompt_bundle:
760
819
  self._storage.save_prompt_set(prompt_bundle)
@@ -15,6 +15,7 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
15
15
  from starlette.responses import JSONResponse
16
16
 
17
17
  from evalvault.adapters.inbound.api.adapter import WebUIAdapter, create_adapter
18
+ from evalvault.config.runtime_services import ensure_local_observability
18
19
  from evalvault.config.settings import Settings, get_settings, is_production_profile
19
20
 
20
21
  logger = logging.getLogger(__name__)
@@ -63,6 +64,7 @@ async def lifespan(app: FastAPI):
63
64
  # Startup: Initialize adapter
64
65
  adapter = create_adapter()
65
66
  app.state.adapter = adapter
67
+ ensure_local_observability(get_settings())
66
68
  try:
67
69
  from evalvault.adapters.inbound.api.routers.chat import warm_rag_index
68
70
 
@@ -71,7 +71,9 @@ class ConfigUpdateRequest(BaseModel):
71
71
  phoenix_endpoint: str | None = None
72
72
  phoenix_enabled: bool | None = None
73
73
  phoenix_sample_rate: float | None = None
74
- tracker_provider: Literal["langfuse", "mlflow", "phoenix", "none"] | None = None
74
+ phoenix_project_name: str | None = None
75
+ phoenix_annotations_enabled: bool | None = None
76
+ tracker_provider: str | None = None
75
77
  postgres_host: str | None = None
76
78
  postgres_port: int | None = None
77
79
  postgres_database: str | None = None
@@ -14,6 +14,8 @@ import typer
14
14
  from rich import print as rprint
15
15
  from rich.console import Console
16
16
 
17
+ from evalvault.config.runtime_services import ensure_local_observability
18
+ from evalvault.config.settings import get_settings
17
19
  from evalvault.domain.metrics.registry import list_metric_names
18
20
 
19
21
  from .commands import attach_sub_apps, register_all_commands
@@ -61,6 +63,7 @@ def main(
61
63
  ),
62
64
  ) -> None:
63
65
  """EvalVault - RAG evaluation system."""
66
+ ensure_local_observability(get_settings())
64
67
 
65
68
 
66
69
  if __name__ == "__main__": # pragma: no cover
@@ -358,6 +358,11 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
358
358
  profile: str | None = profile_option(
359
359
  help_text="비교 리포트용 LLM 프로필 (dev, prod, openai)",
360
360
  ),
361
+ use_llm_report: bool = typer.Option(
362
+ True,
363
+ "--use-llm-report/--no-llm-report",
364
+ help="LLM 보고서 사용 여부",
365
+ ),
361
366
  ) -> None:
362
367
  """두 실행을 통계적으로 비교합니다."""
363
368
 
@@ -461,7 +466,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
461
466
  compare_metrics=metric_list,
462
467
  test_type=test,
463
468
  report_type="comparison",
464
- use_llm_report=True,
469
+ use_llm_report=use_llm_report,
465
470
  )
466
471
 
467
472
  artifacts_dir = resolve_artifact_dir(
@@ -31,7 +31,7 @@ from ..utils.validators import parse_csv_option, validate_choices
31
31
  from .run_helpers import (
32
32
  _display_results,
33
33
  _is_oss_open_model,
34
- _log_to_tracker,
34
+ _log_to_trackers,
35
35
  _resolve_thresholds,
36
36
  _save_results,
37
37
  _save_to_db,
@@ -419,12 +419,12 @@ def create_method_app(console: Console) -> typer.Typer:
419
419
  _display_results(result, console)
420
420
 
421
421
  if tracker and tracker != "none":
422
- _log_to_tracker(settings, result, console, tracker_type=tracker)
422
+ _log_to_trackers(settings, result, console, tracker_type=tracker)
423
423
 
424
424
  if eval_output:
425
425
  _save_results(eval_output, result, console)
426
426
 
427
- _save_to_db(db_path, result, console)
427
+ _save_to_db(settings, db_path, result, console)
428
428
 
429
429
  return method_app
430
430