evalvault 1.75.0__py3-none-any.whl → 1.77.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import json
6
6
  from collections.abc import Callable, Sequence
7
7
  from dataclasses import asdict, dataclass
8
8
  from pathlib import Path
9
- from typing import Any, Literal
9
+ from typing import Any
10
10
 
11
11
  import click
12
12
  import typer
@@ -25,7 +25,7 @@ from evalvault.config.phoenix_support import (
25
25
  instrumentation_span,
26
26
  set_span_attributes,
27
27
  )
28
- from evalvault.config.settings import Settings
28
+ from evalvault.config.settings import Settings, resolve_tracker_providers
29
29
  from evalvault.domain.entities import (
30
30
  Dataset,
31
31
  EvaluationRun,
@@ -58,7 +58,7 @@ from evalvault.ports.outbound.tracker_port import TrackerPort
58
58
  from ..utils.console import print_cli_error, print_cli_warning
59
59
  from ..utils.formatters import format_score, format_status
60
60
 
61
- TrackerType = Literal["langfuse", "mlflow", "phoenix", "none"]
61
+ TrackerType = str
62
62
  apply_retriever_to_dataset = retriever_context.apply_retriever_to_dataset
63
63
 
64
64
 
@@ -319,15 +319,22 @@ def _display_memory_insights(insights: dict[str, Any], console: Console) -> None
319
319
  console.print(Panel(panel_body, title="Domain Memory Insights", border_style="magenta"))
320
320
 
321
321
 
322
- def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> TrackerPort | None:
322
+ def _get_tracker(
323
+ settings: Settings,
324
+ tracker_type: str,
325
+ console: Console,
326
+ *,
327
+ required: bool = False,
328
+ ) -> TrackerPort | None:
323
329
  """Get the appropriate tracker adapter based on type."""
324
330
  if tracker_type == "langfuse":
325
331
  if not settings.langfuse_public_key or not settings.langfuse_secret_key:
326
- print_cli_warning(
327
- console,
328
- "Langfuse 자격 증명이 설정되지 않아 로깅을 건너뜁니다.",
329
- tips=["LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY를 .env에 추가하세요."],
330
- )
332
+ message = "Langfuse 자격 증명이 설정되지 않았습니다."
333
+ tips = ["LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY를 .env에 추가하세요."]
334
+ if required:
335
+ print_cli_error(console, message, fixes=tips)
336
+ raise typer.Exit(2)
337
+ print_cli_warning(console, message + " 로깅을 건너뜁니다.", tips=tips)
331
338
  return None
332
339
  from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
333
340
 
@@ -338,42 +345,48 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
338
345
  )
339
346
 
340
347
  elif tracker_type == "mlflow":
341
- if not settings.mlflow_tracking_uri:
342
- print_cli_warning(
343
- console,
344
- "MLflow tracking URI가 설정되지 않아 로깅을 건너뜁니다.",
345
- tips=["MLFLOW_TRACKING_URI 환경 변수를 설정하세요."],
346
- )
347
- return None
348
+ tracking_uri = getattr(settings, "mlflow_tracking_uri", None)
349
+ if not isinstance(tracking_uri, str) or not tracking_uri.strip():
350
+ tracking_uri = f"sqlite:///{Path.cwd() / 'mlruns.db'}"
351
+ experiment_name = getattr(settings, "mlflow_experiment_name", None)
352
+ if not isinstance(experiment_name, str) or not experiment_name.strip():
353
+ experiment_name = "evalvault"
348
354
  try:
349
355
  from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
350
356
 
351
357
  return MLflowAdapter(
352
- tracking_uri=settings.mlflow_tracking_uri,
353
- experiment_name=settings.mlflow_experiment_name,
358
+ tracking_uri=tracking_uri,
359
+ experiment_name=experiment_name,
354
360
  )
355
361
  except ImportError:
356
- print_cli_warning(
357
- console,
358
- "MLflow extra가 설치되지 않았습니다.",
359
- tips=["uv sync --extra mlflow 명령으로 구성요소를 설치하세요."],
360
- )
362
+ message = "MLflow extra가 설치되지 않았습니다."
363
+ tips = ["uv sync --extra mlflow 명령으로 구성요소를 설치하세요."]
364
+ if required:
365
+ print_cli_error(console, message, fixes=tips)
366
+ raise typer.Exit(2)
367
+ print_cli_warning(console, message, tips=tips)
361
368
  return None
362
369
 
363
370
  elif tracker_type == "phoenix":
364
371
  try:
365
372
  from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
366
373
 
374
+ endpoint = getattr(settings, "phoenix_endpoint", None)
375
+ if not isinstance(endpoint, str) or not endpoint.strip():
376
+ endpoint = "http://localhost:6006/v1/traces"
367
377
  return PhoenixAdapter(
368
- endpoint=settings.phoenix_endpoint,
378
+ endpoint=endpoint,
369
379
  service_name="evalvault",
380
+ project_name=getattr(settings, "phoenix_project_name", None),
381
+ annotations_enabled=getattr(settings, "phoenix_annotations_enabled", True),
370
382
  )
371
383
  except ImportError:
372
- print_cli_warning(
373
- console,
374
- "Phoenix extra가 설치되지 않았습니다.",
375
- tips=["uv sync --extra phoenix 명령으로 의존성을 추가하세요."],
376
- )
384
+ message = "Phoenix extra가 설치되지 않았습니다."
385
+ tips = ["uv sync --extra phoenix 명령으로 의존성을 추가하세요."]
386
+ if required:
387
+ print_cli_error(console, message, fixes=tips)
388
+ raise typer.Exit(2)
389
+ print_cli_warning(console, message, tips=tips)
377
390
  return None
378
391
 
379
392
  else:
@@ -385,6 +398,19 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
385
398
  return None
386
399
 
387
400
 
401
+ def _resolve_tracker_list(tracker_type: str) -> list[str]:
402
+ providers = resolve_tracker_providers(tracker_type)
403
+ if not providers:
404
+ return []
405
+ if providers == ["none"]:
406
+ return ["none"]
407
+ supported = {"langfuse", "mlflow", "phoenix"}
408
+ unknown = [entry for entry in providers if entry not in supported]
409
+ if unknown:
410
+ raise ValueError(f"Unknown tracker provider(s): {', '.join(unknown)}")
411
+ return providers
412
+
413
+
388
414
  def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
389
415
  """Build a Phoenix UI URL for the given trace ID."""
390
416
 
@@ -395,7 +421,7 @@ def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
395
421
  return f"{base.rstrip('/')}/#/traces/{trace_id}"
396
422
 
397
423
 
398
- def _log_to_tracker(
424
+ def _log_to_trackers(
399
425
  settings: Settings,
400
426
  result,
401
427
  console: Console,
@@ -404,18 +430,39 @@ def _log_to_tracker(
404
430
  phoenix_options: dict[str, Any] | None = None,
405
431
  log_phoenix_traces_fn: Callable[..., int] | None = None,
406
432
  ) -> None:
407
- """Log evaluation results to the specified tracker."""
408
- tracker = _get_tracker(settings, tracker_type, console)
409
- if tracker is None:
433
+ """Log evaluation results to the specified tracker(s)."""
434
+ try:
435
+ tracker_types = _resolve_tracker_list(tracker_type)
436
+ except ValueError as exc:
437
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
438
+ raise typer.Exit(2) from exc
439
+ if not tracker_types or tracker_types == ["none"]:
410
440
  return
411
441
 
412
- tracker_name = tracker_type.capitalize()
413
- trace_id: str | None = None
414
- with console.status(f"[bold green]Logging to {tracker_name}..."):
415
- try:
416
- trace_id = tracker.log_evaluation_run(result)
417
- console.print(f"[green]Logged to {tracker_name}[/green] (trace_id: {trace_id})")
418
- if trace_id and tracker_type == "phoenix":
442
+ result.tracker_metadata.setdefault("tracker_providers", tracker_types)
443
+ for provider in tracker_types:
444
+ tracker = _get_tracker(settings, provider, console, required=True)
445
+ if tracker is None:
446
+ raise typer.Exit(2)
447
+ tracker_name = provider.capitalize()
448
+ trace_id: str | None = None
449
+ with console.status(f"[bold green]Logging to {tracker_name}..."):
450
+ try:
451
+ trace_id = tracker.log_evaluation_run(result)
452
+ console.print(f"[green]Logged to {tracker_name}[/green] (trace_id: {trace_id})")
453
+ except Exception as exc:
454
+ print_cli_error(
455
+ console,
456
+ f"{tracker_name} 로깅에 실패했습니다.",
457
+ details=str(exc),
458
+ )
459
+ raise typer.Exit(2) from exc
460
+
461
+ if trace_id:
462
+ provider_meta = result.tracker_metadata.setdefault(provider, {})
463
+ if isinstance(provider_meta, dict):
464
+ provider_meta.setdefault("trace_id", trace_id)
465
+ if provider == "phoenix":
419
466
  endpoint = getattr(settings, "phoenix_endpoint", "http://localhost:6006/v1/traces")
420
467
  if not isinstance(endpoint, str) or not endpoint:
421
468
  endpoint = "http://localhost:6006/v1/traces"
@@ -431,30 +478,82 @@ def _log_to_tracker(
431
478
  trace_url = get_phoenix_trace_url(result.tracker_metadata)
432
479
  if trace_url:
433
480
  console.print(f"[dim]Phoenix Trace: {trace_url}[/dim]")
434
- except Exception as exc: # pragma: no cover - telemetry best-effort
435
- print_cli_warning(
436
- console,
437
- f"{tracker_name} 로깅에 실패했습니다.",
438
- tips=[str(exc)],
481
+
482
+ options = phoenix_options or {}
483
+ log_traces = log_phoenix_traces_fn or log_phoenix_traces
484
+ extra = log_traces(
485
+ tracker,
486
+ result,
487
+ max_traces=options.get("max_traces"),
488
+ metadata=options.get("metadata"),
489
+ )
490
+ if extra:
491
+ console.print(
492
+ f"[dim]Recorded {extra} Phoenix RAG trace(s) for detailed observability.[/dim]"
493
+ )
494
+
495
+
496
+ def _log_analysis_artifacts(
497
+ settings: Settings,
498
+ result: EvaluationRun,
499
+ console: Console,
500
+ tracker_type: str,
501
+ *,
502
+ analysis_payload: dict[str, Any],
503
+ artifact_index: dict[str, Any],
504
+ report_text: str,
505
+ output_path: Path,
506
+ report_path: Path,
507
+ ) -> None:
508
+ """Log analysis artifacts to tracker(s) as a separate trace/run."""
509
+ try:
510
+ tracker_types = _resolve_tracker_list(tracker_type)
511
+ except ValueError as exc:
512
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
513
+ raise typer.Exit(2) from exc
514
+ if not tracker_types or tracker_types == ["none"]:
515
+ return
516
+
517
+ metadata = {
518
+ "run_id": result.run_id,
519
+ "dataset_name": result.dataset_name,
520
+ "dataset_version": result.dataset_version,
521
+ "analysis_output": str(output_path),
522
+ "analysis_report": str(report_path),
523
+ "analysis_artifacts_dir": artifact_index.get("dir"),
524
+ "event_type": "analysis",
525
+ }
526
+
527
+ for provider in tracker_types:
528
+ tracker = _get_tracker(settings, provider, console, required=True)
529
+ if tracker is None:
530
+ raise typer.Exit(2)
531
+ trace_name = f"analysis-{result.run_id[:8]}"
532
+ try:
533
+ trace_id = tracker.start_trace(trace_name, metadata=metadata)
534
+ tracker.save_artifact(
535
+ trace_id, "analysis_payload", analysis_payload, artifact_type="json"
439
536
  )
440
- return
441
-
442
- if tracker_type == "phoenix":
443
- options = phoenix_options or {}
444
- log_traces = log_phoenix_traces_fn or log_phoenix_traces
445
- extra = log_traces(
446
- tracker,
447
- result,
448
- max_traces=options.get("max_traces"),
449
- metadata=options.get("metadata"),
450
- )
451
- if extra:
537
+ tracker.save_artifact(
538
+ trace_id, "analysis_artifacts", artifact_index, artifact_type="json"
539
+ )
540
+ tracker.save_artifact(trace_id, "analysis_report", report_text, artifact_type="text")
541
+ tracker.end_trace(trace_id)
452
542
  console.print(
453
- f"[dim]Recorded {extra} Phoenix RAG trace(s) for detailed observability.[/dim]"
543
+ f"[green]Logged analysis artifacts to {provider.capitalize()}[/green] "
544
+ f"(trace_id: {trace_id})"
454
545
  )
546
+ except Exception as exc:
547
+ print_cli_error(
548
+ console,
549
+ f"{provider.capitalize()} 분석 로깅에 실패했습니다.",
550
+ details=str(exc),
551
+ )
552
+ raise typer.Exit(2) from exc
455
553
 
456
554
 
457
555
  def _save_to_db(
556
+ settings: Settings,
458
557
  db_path: Path | None,
459
558
  result,
460
559
  console: Console,
@@ -463,7 +562,7 @@ def _save_to_db(
463
562
  export_excel: bool = True,
464
563
  ) -> None:
465
564
  """Persist evaluation run (and optional prompt set) to database."""
466
- storage = build_storage_adapter(settings=Settings(), db_path=db_path)
565
+ storage = build_storage_adapter(settings=settings, db_path=db_path)
467
566
  storage_label = (
468
567
  "PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
469
568
  )
@@ -507,6 +606,7 @@ def _save_to_db(
507
606
 
508
607
 
509
608
  def _save_multiturn_to_db(
609
+ settings: Settings,
510
610
  db_path: Path | None,
511
611
  run_record: MultiTurnRunRecord,
512
612
  conversations: list[MultiTurnConversationRecord],
@@ -518,7 +618,7 @@ def _save_multiturn_to_db(
518
618
  metric_thresholds: dict[str, float] | None = None,
519
619
  ) -> None:
520
620
  """Persist multiturn evaluation run to database."""
521
- storage = build_storage_adapter(settings=Settings(), db_path=db_path)
621
+ storage = build_storage_adapter(settings=settings, db_path=db_path)
522
622
  storage_label = (
523
623
  "PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
524
624
  )
@@ -740,6 +840,8 @@ def log_phoenix_traces(
740
840
  return 0
741
841
 
742
842
  limit = max_traces if max_traces is not None else run.total_test_cases
843
+ if not isinstance(limit, int):
844
+ limit = None
743
845
 
744
846
  count = 0
745
847
  for result in run.results:
@@ -1173,8 +1275,10 @@ def _collect_prompt_metadata(
1173
1275
  prompt_path=target,
1174
1276
  content=content,
1175
1277
  )
1176
- summary.content_preview = _build_content_preview(content)
1177
- summaries.append(asdict(summary))
1278
+ summary_dict = asdict(summary)
1279
+ summary_dict["content_preview"] = _build_content_preview(content)
1280
+ summary_dict["content"] = content
1281
+ summaries.append(summary_dict)
1178
1282
 
1179
1283
  return summaries
1180
1284