evalvault 1.74.0__py3-none-any.whl → 1.76.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. evalvault/adapters/inbound/api/adapter.py +127 -80
  2. evalvault/adapters/inbound/api/routers/calibration.py +9 -9
  3. evalvault/adapters/inbound/api/routers/chat.py +303 -17
  4. evalvault/adapters/inbound/api/routers/config.py +3 -1
  5. evalvault/adapters/inbound/api/routers/domain.py +10 -5
  6. evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
  7. evalvault/adapters/inbound/api/routers/runs.py +23 -4
  8. evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
  9. evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
  10. evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
  11. evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
  12. evalvault/adapters/inbound/cli/commands/compare.py +2 -7
  13. evalvault/adapters/inbound/cli/commands/debug.py +3 -2
  14. evalvault/adapters/inbound/cli/commands/domain.py +12 -12
  15. evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
  16. evalvault/adapters/inbound/cli/commands/gate.py +3 -2
  17. evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
  18. evalvault/adapters/inbound/cli/commands/history.py +3 -12
  19. evalvault/adapters/inbound/cli/commands/method.py +3 -4
  20. evalvault/adapters/inbound/cli/commands/ops.py +2 -2
  21. evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
  22. evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
  23. evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
  24. evalvault/adapters/inbound/cli/commands/regress.py +5 -4
  25. evalvault/adapters/inbound/cli/commands/run.py +188 -59
  26. evalvault/adapters/inbound/cli/commands/run_helpers.py +181 -70
  27. evalvault/adapters/inbound/cli/commands/stage.py +6 -25
  28. evalvault/adapters/inbound/cli/utils/options.py +10 -4
  29. evalvault/adapters/inbound/mcp/tools.py +11 -8
  30. evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
  31. evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
  32. evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
  33. evalvault/adapters/outbound/domain_memory/factory.py +68 -0
  34. evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
  35. evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
  36. evalvault/adapters/outbound/llm/factory.py +1 -1
  37. evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
  38. evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
  39. evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
  40. evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
  41. evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
  42. evalvault/adapters/outbound/storage/base_sql.py +3 -2
  43. evalvault/adapters/outbound/storage/factory.py +53 -0
  44. evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
  45. evalvault/adapters/outbound/tracker/mlflow_adapter.py +209 -54
  46. evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
  47. evalvault/config/instrumentation.py +8 -6
  48. evalvault/config/phoenix_support.py +5 -0
  49. evalvault/config/settings.py +71 -11
  50. evalvault/domain/services/domain_learning_hook.py +2 -1
  51. evalvault/domain/services/evaluator.py +2 -0
  52. evalvault/ports/inbound/web_port.py +3 -1
  53. evalvault/ports/outbound/storage_port.py +2 -0
  54. evalvault-1.76.0.dist-info/METADATA +221 -0
  55. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/RECORD +58 -53
  56. evalvault-1.74.0.dist-info/METADATA +0 -585
  57. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/WHEEL +0 -0
  58. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/entry_points.txt +0 -0
  59. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -6,7 +6,7 @@ import json
6
6
  from collections.abc import Callable, Sequence
7
7
  from dataclasses import asdict, dataclass
8
8
  from pathlib import Path
9
- from typing import Any, Literal
9
+ from typing import Any
10
10
 
11
11
  import click
12
12
  import typer
@@ -18,13 +18,14 @@ from rich.table import Table
18
18
  from evalvault.adapters.outbound.dataset import StreamingConfig, StreamingDatasetLoader
19
19
  from evalvault.adapters.outbound.dataset.thresholds import extract_thresholds_from_rows
20
20
  from evalvault.adapters.outbound.kg.networkx_adapter import NetworkXKnowledgeGraph
21
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
21
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
22
+ from evalvault.adapters.outbound.storage.postgres_adapter import PostgreSQLStorageAdapter
22
23
  from evalvault.config.phoenix_support import (
23
24
  get_phoenix_trace_url,
24
25
  instrumentation_span,
25
26
  set_span_attributes,
26
27
  )
27
- from evalvault.config.settings import Settings
28
+ from evalvault.config.settings import Settings, resolve_tracker_providers
28
29
  from evalvault.domain.entities import (
29
30
  Dataset,
30
31
  EvaluationRun,
@@ -57,7 +58,7 @@ from evalvault.ports.outbound.tracker_port import TrackerPort
57
58
  from ..utils.console import print_cli_error, print_cli_warning
58
59
  from ..utils.formatters import format_score, format_status
59
60
 
60
- TrackerType = Literal["langfuse", "mlflow", "phoenix", "none"]
61
+ TrackerType = str
61
62
  apply_retriever_to_dataset = retriever_context.apply_retriever_to_dataset
62
63
 
63
64
 
@@ -318,15 +319,22 @@ def _display_memory_insights(insights: dict[str, Any], console: Console) -> None
318
319
  console.print(Panel(panel_body, title="Domain Memory Insights", border_style="magenta"))
319
320
 
320
321
 
321
- def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> TrackerPort | None:
322
+ def _get_tracker(
323
+ settings: Settings,
324
+ tracker_type: str,
325
+ console: Console,
326
+ *,
327
+ required: bool = False,
328
+ ) -> TrackerPort | None:
322
329
  """Get the appropriate tracker adapter based on type."""
323
330
  if tracker_type == "langfuse":
324
331
  if not settings.langfuse_public_key or not settings.langfuse_secret_key:
325
- print_cli_warning(
326
- console,
327
- "Langfuse 자격 증명이 설정되지 않아 로깅을 건너뜁니다.",
328
- tips=["LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY를 .env에 추가하세요."],
329
- )
332
+ message = "Langfuse 자격 증명이 설정되지 않았습니다."
333
+ tips = ["LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY를 .env에 추가하세요."]
334
+ if required:
335
+ print_cli_error(console, message, fixes=tips)
336
+ raise typer.Exit(2)
337
+ print_cli_warning(console, message + " 로깅을 건너뜁니다.", tips=tips)
330
338
  return None
331
339
  from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
332
340
 
@@ -338,11 +346,12 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
338
346
 
339
347
  elif tracker_type == "mlflow":
340
348
  if not settings.mlflow_tracking_uri:
341
- print_cli_warning(
342
- console,
343
- "MLflow tracking URI가 설정되지 않아 로깅을 건너뜁니다.",
344
- tips=["MLFLOW_TRACKING_URI 환경 변수를 설정하세요."],
345
- )
349
+ message = "MLflow tracking URI가 설정되지 않았습니다."
350
+ tips = ["MLFLOW_TRACKING_URI 환경 변수를 설정하세요."]
351
+ if required:
352
+ print_cli_error(console, message, fixes=tips)
353
+ raise typer.Exit(2)
354
+ print_cli_warning(console, message + " 로깅을 건너뜁니다.", tips=tips)
346
355
  return None
347
356
  try:
348
357
  from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
@@ -352,11 +361,12 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
352
361
  experiment_name=settings.mlflow_experiment_name,
353
362
  )
354
363
  except ImportError:
355
- print_cli_warning(
356
- console,
357
- "MLflow extra가 설치되지 않았습니다.",
358
- tips=["uv sync --extra mlflow 명령으로 구성요소를 설치하세요."],
359
- )
364
+ message = "MLflow extra가 설치되지 않았습니다."
365
+ tips = ["uv sync --extra mlflow 명령으로 구성요소를 설치하세요."]
366
+ if required:
367
+ print_cli_error(console, message, fixes=tips)
368
+ raise typer.Exit(2)
369
+ print_cli_warning(console, message, tips=tips)
360
370
  return None
361
371
 
362
372
  elif tracker_type == "phoenix":
@@ -366,13 +376,16 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
366
376
  return PhoenixAdapter(
367
377
  endpoint=settings.phoenix_endpoint,
368
378
  service_name="evalvault",
379
+ project_name=getattr(settings, "phoenix_project_name", None),
380
+ annotations_enabled=getattr(settings, "phoenix_annotations_enabled", True),
369
381
  )
370
382
  except ImportError:
371
- print_cli_warning(
372
- console,
373
- "Phoenix extra가 설치되지 않았습니다.",
374
- tips=["uv sync --extra phoenix 명령으로 의존성을 추가하세요."],
375
- )
383
+ message = "Phoenix extra가 설치되지 않았습니다."
384
+ tips = ["uv sync --extra phoenix 명령으로 의존성을 추가하세요."]
385
+ if required:
386
+ print_cli_error(console, message, fixes=tips)
387
+ raise typer.Exit(2)
388
+ print_cli_warning(console, message, tips=tips)
376
389
  return None
377
390
 
378
391
  else:
@@ -384,6 +397,22 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
384
397
  return None
385
398
 
386
399
 
400
+ def _resolve_tracker_list(tracker_type: str) -> list[str]:
401
+ providers = resolve_tracker_providers(tracker_type)
402
+ if not providers:
403
+ return []
404
+ if providers == ["none"]:
405
+ return ["none"]
406
+ supported = {"langfuse", "mlflow", "phoenix"}
407
+ unknown = [entry for entry in providers if entry not in supported]
408
+ if unknown:
409
+ raise ValueError(f"Unknown tracker provider(s): {', '.join(unknown)}")
410
+ required = {"mlflow", "phoenix"}
411
+ if not required.issubset(set(providers)):
412
+ raise ValueError("tracker must include both 'mlflow' and 'phoenix'")
413
+ return providers
414
+
415
+
387
416
  def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
388
417
  """Build a Phoenix UI URL for the given trace ID."""
389
418
 
@@ -394,7 +423,7 @@ def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
394
423
  return f"{base.rstrip('/')}/#/traces/{trace_id}"
395
424
 
396
425
 
397
- def _log_to_tracker(
426
+ def _log_to_trackers(
398
427
  settings: Settings,
399
428
  result,
400
429
  console: Console,
@@ -403,18 +432,39 @@ def _log_to_tracker(
403
432
  phoenix_options: dict[str, Any] | None = None,
404
433
  log_phoenix_traces_fn: Callable[..., int] | None = None,
405
434
  ) -> None:
406
- """Log evaluation results to the specified tracker."""
407
- tracker = _get_tracker(settings, tracker_type, console)
408
- if tracker is None:
435
+ """Log evaluation results to the specified tracker(s)."""
436
+ try:
437
+ tracker_types = _resolve_tracker_list(tracker_type)
438
+ except ValueError as exc:
439
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
440
+ raise typer.Exit(2) from exc
441
+ if not tracker_types or tracker_types == ["none"]:
409
442
  return
410
443
 
411
- tracker_name = tracker_type.capitalize()
412
- trace_id: str | None = None
413
- with console.status(f"[bold green]Logging to {tracker_name}..."):
414
- try:
415
- trace_id = tracker.log_evaluation_run(result)
416
- console.print(f"[green]Logged to {tracker_name}[/green] (trace_id: {trace_id})")
417
- if trace_id and tracker_type == "phoenix":
444
+ result.tracker_metadata.setdefault("tracker_providers", tracker_types)
445
+ for provider in tracker_types:
446
+ tracker = _get_tracker(settings, provider, console, required=True)
447
+ if tracker is None:
448
+ raise typer.Exit(2)
449
+ tracker_name = provider.capitalize()
450
+ trace_id: str | None = None
451
+ with console.status(f"[bold green]Logging to {tracker_name}..."):
452
+ try:
453
+ trace_id = tracker.log_evaluation_run(result)
454
+ console.print(f"[green]Logged to {tracker_name}[/green] (trace_id: {trace_id})")
455
+ except Exception as exc:
456
+ print_cli_error(
457
+ console,
458
+ f"{tracker_name} 로깅에 실패했습니다.",
459
+ details=str(exc),
460
+ )
461
+ raise typer.Exit(2) from exc
462
+
463
+ if trace_id:
464
+ provider_meta = result.tracker_metadata.setdefault(provider, {})
465
+ if isinstance(provider_meta, dict):
466
+ provider_meta.setdefault("trace_id", trace_id)
467
+ if provider == "phoenix":
418
468
  endpoint = getattr(settings, "phoenix_endpoint", "http://localhost:6006/v1/traces")
419
469
  if not isinstance(endpoint, str) or not endpoint:
420
470
  endpoint = "http://localhost:6006/v1/traces"
@@ -430,42 +480,96 @@ def _log_to_tracker(
430
480
  trace_url = get_phoenix_trace_url(result.tracker_metadata)
431
481
  if trace_url:
432
482
  console.print(f"[dim]Phoenix Trace: {trace_url}[/dim]")
433
- except Exception as exc: # pragma: no cover - telemetry best-effort
434
- print_cli_warning(
435
- console,
436
- f"{tracker_name} 로깅에 실패했습니다.",
437
- tips=[str(exc)],
483
+
484
+ options = phoenix_options or {}
485
+ log_traces = log_phoenix_traces_fn or log_phoenix_traces
486
+ extra = log_traces(
487
+ tracker,
488
+ result,
489
+ max_traces=options.get("max_traces"),
490
+ metadata=options.get("metadata"),
491
+ )
492
+ if extra:
493
+ console.print(
494
+ f"[dim]Recorded {extra} Phoenix RAG trace(s) for detailed observability.[/dim]"
495
+ )
496
+
497
+
498
+ def _log_analysis_artifacts(
499
+ settings: Settings,
500
+ result: EvaluationRun,
501
+ console: Console,
502
+ tracker_type: str,
503
+ *,
504
+ analysis_payload: dict[str, Any],
505
+ artifact_index: dict[str, Any],
506
+ report_text: str,
507
+ output_path: Path,
508
+ report_path: Path,
509
+ ) -> None:
510
+ """Log analysis artifacts to tracker(s) as a separate trace/run."""
511
+ try:
512
+ tracker_types = _resolve_tracker_list(tracker_type)
513
+ except ValueError as exc:
514
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
515
+ raise typer.Exit(2) from exc
516
+ if not tracker_types or tracker_types == ["none"]:
517
+ return
518
+
519
+ metadata = {
520
+ "run_id": result.run_id,
521
+ "dataset_name": result.dataset_name,
522
+ "dataset_version": result.dataset_version,
523
+ "analysis_output": str(output_path),
524
+ "analysis_report": str(report_path),
525
+ "analysis_artifacts_dir": artifact_index.get("dir"),
526
+ "event_type": "analysis",
527
+ }
528
+
529
+ for provider in tracker_types:
530
+ tracker = _get_tracker(settings, provider, console, required=True)
531
+ if tracker is None:
532
+ raise typer.Exit(2)
533
+ trace_name = f"analysis-{result.run_id[:8]}"
534
+ try:
535
+ trace_id = tracker.start_trace(trace_name, metadata=metadata)
536
+ tracker.save_artifact(
537
+ trace_id, "analysis_payload", analysis_payload, artifact_type="json"
438
538
  )
439
- return
440
-
441
- if tracker_type == "phoenix":
442
- options = phoenix_options or {}
443
- log_traces = log_phoenix_traces_fn or log_phoenix_traces
444
- extra = log_traces(
445
- tracker,
446
- result,
447
- max_traces=options.get("max_traces"),
448
- metadata=options.get("metadata"),
449
- )
450
- if extra:
539
+ tracker.save_artifact(
540
+ trace_id, "analysis_artifacts", artifact_index, artifact_type="json"
541
+ )
542
+ tracker.save_artifact(trace_id, "analysis_report", report_text, artifact_type="text")
543
+ tracker.end_trace(trace_id)
451
544
  console.print(
452
- f"[dim]Recorded {extra} Phoenix RAG trace(s) for detailed observability.[/dim]"
545
+ f"[green]Logged analysis artifacts to {provider.capitalize()}[/green] "
546
+ f"(trace_id: {trace_id})"
453
547
  )
548
+ except Exception as exc:
549
+ print_cli_error(
550
+ console,
551
+ f"{provider.capitalize()} 분석 로깅에 실패했습니다.",
552
+ details=str(exc),
553
+ )
554
+ raise typer.Exit(2) from exc
454
555
 
455
556
 
456
557
  def _save_to_db(
457
- db_path: Path,
558
+ db_path: Path | None,
458
559
  result,
459
560
  console: Console,
460
561
  *,
461
- storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
462
562
  prompt_bundle: PromptSetBundle | None = None,
463
563
  export_excel: bool = True,
464
564
  ) -> None:
465
- """Persist evaluation run (and optional prompt set) to SQLite database."""
466
- with console.status(f"[bold green]Saving to database {db_path}..."):
565
+ """Persist evaluation run (and optional prompt set) to database."""
566
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
567
+ storage_label = (
568
+ "PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
569
+ )
570
+ export_base = db_path.parent if db_path else Path("data/exports")
571
+ with console.status(f"[bold green]Saving to database {storage_label}..."):
467
572
  try:
468
- storage = storage_cls(db_path=db_path)
469
573
  if prompt_bundle:
470
574
  storage.save_prompt_set(prompt_bundle)
471
575
  storage.save_run(result)
@@ -475,7 +579,8 @@ def _save_to_db(
475
579
  prompt_bundle.prompt_set.prompt_set_id,
476
580
  )
477
581
  if export_excel:
478
- excel_path = db_path.parent / f"evalvault_run_{result.run_id}.xlsx"
582
+ export_base.mkdir(parents=True, exist_ok=True)
583
+ excel_path = export_base / f"evalvault_run_{result.run_id}.xlsx"
479
584
  try:
480
585
  storage.export_run_to_excel(result.run_id, excel_path)
481
586
  console.print(f"[green]Excel export saved: {excel_path}[/green]")
@@ -485,7 +590,7 @@ def _save_to_db(
485
590
  "엑셀 내보내기에 실패했습니다.",
486
591
  tips=[str(exc)],
487
592
  )
488
- console.print(f"[green]Results saved to database: {db_path}[/green]")
593
+ console.print(f"[green]Results saved to database: {storage_label}[/green]")
489
594
  console.print(f"[dim]Run ID: {result.run_id}[/dim]")
490
595
  if prompt_bundle:
491
596
  console.print(
@@ -502,21 +607,24 @@ def _save_to_db(
502
607
 
503
608
 
504
609
  def _save_multiturn_to_db(
505
- db_path: Path,
610
+ db_path: Path | None,
506
611
  run_record: MultiTurnRunRecord,
507
612
  conversations: list[MultiTurnConversationRecord],
508
613
  turn_results: list[MultiTurnTurnResult],
509
614
  console: Console,
510
615
  *,
511
- storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
512
616
  export_excel: bool = True,
513
617
  excel_output_path: Path | None = None,
514
618
  metric_thresholds: dict[str, float] | None = None,
515
619
  ) -> None:
516
- """Persist multiturn evaluation run to SQLite database."""
517
- with console.status(f"[bold green]Saving multiturn run to {db_path}..."):
620
+ """Persist multiturn evaluation run to database."""
621
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
622
+ storage_label = (
623
+ "PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
624
+ )
625
+ export_base = db_path.parent if db_path else Path("data/exports")
626
+ with console.status(f"[bold green]Saving multiturn run to {storage_label}..."):
518
627
  try:
519
- storage = storage_cls(db_path=db_path)
520
628
  storage.save_multiturn_run(
521
629
  run_record,
522
630
  conversations,
@@ -524,8 +632,9 @@ def _save_multiturn_to_db(
524
632
  metric_thresholds=metric_thresholds,
525
633
  )
526
634
  if export_excel:
635
+ export_base.mkdir(parents=True, exist_ok=True)
527
636
  excel_path = excel_output_path or (
528
- db_path.parent / f"evalvault_multiturn_{run_record.run_id}.xlsx"
637
+ export_base / f"evalvault_multiturn_{run_record.run_id}.xlsx"
529
638
  )
530
639
  try:
531
640
  storage.export_multiturn_run_to_excel(run_record.run_id, excel_path)
@@ -536,7 +645,7 @@ def _save_multiturn_to_db(
536
645
  "멀티턴 엑셀 내보내기에 실패했습니다.",
537
646
  tips=[str(exc)],
538
647
  )
539
- console.print(f"[green]Multiturn results saved to database: {db_path}[/green]")
648
+ console.print(f"[green]Multiturn results saved to database: {storage_label}[/green]")
540
649
  console.print(f"[dim]Run ID: {run_record.run_id}[/dim]")
541
650
  except Exception as exc: # pragma: no cover - persistence errors
542
651
  print_cli_error(
@@ -1164,8 +1273,10 @@ def _collect_prompt_metadata(
1164
1273
  prompt_path=target,
1165
1274
  content=content,
1166
1275
  )
1167
- summary.content_preview = _build_content_preview(content)
1168
- summaries.append(asdict(summary))
1276
+ summary_dict = asdict(summary)
1277
+ summary_dict["content_preview"] = _build_content_preview(content)
1278
+ summary_dict["content"] = content
1279
+ summaries.append(summary_dict)
1169
1280
 
1170
1281
  return summaries
1171
1282
 
@@ -16,7 +16,7 @@ from rich.table import Table
16
16
  from evalvault.adapters.outbound.improvement.stage_metric_playbook_loader import (
17
17
  StageMetricPlaybookLoader,
18
18
  )
19
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
19
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
20
20
  from evalvault.config.settings import Settings
21
21
  from evalvault.domain.entities.stage import REQUIRED_STAGE_TYPES, StageEvent, StageMetric
22
22
  from evalvault.domain.services.stage_metric_guide_service import StageMetricGuideService
@@ -28,13 +28,6 @@ from ..utils.options import db_option
28
28
  logger = logging.getLogger(__name__)
29
29
 
30
30
 
31
- def _resolve_db_path(db_path: Path | None) -> Path:
32
- resolved = db_path or Settings().evalvault_db_path
33
- if resolved is None:
34
- raise typer.BadParameter("Database path is not configured.")
35
- return resolved
36
-
37
-
38
31
  @dataclass
39
32
  class ValidationStats:
40
33
  """Tracks StageEvent validation failures by error type."""
@@ -122,8 +115,7 @@ def create_stage_app(console: Console) -> typer.Typer:
122
115
  console.print("[yellow]No valid stage events found in the input file.[/yellow]")
123
116
  raise typer.Exit(1)
124
117
 
125
- resolved_db_path = _resolve_db_path(db_path)
126
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
118
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
127
119
  stored = storage.save_stage_events(events)
128
120
 
129
121
  console.print(f"[green]Stored {stored} stage event(s).[/green]")
@@ -147,8 +139,7 @@ def create_stage_app(console: Console) -> typer.Typer:
147
139
  db_path: Path | None = db_option(help_text="Path to database file."),
148
140
  ) -> None:
149
141
  """List stage events for a run."""
150
- resolved_db_path = _resolve_db_path(db_path)
151
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
142
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
152
143
  events = storage.list_stage_events(run_id, stage_type=stage_type)
153
144
 
154
145
  if not events:
@@ -184,8 +175,7 @@ def create_stage_app(console: Console) -> typer.Typer:
184
175
  db_path: Path | None = db_option(help_text="Path to database file."),
185
176
  ) -> None:
186
177
  """Show summary stats for stage events."""
187
- resolved_db_path = _resolve_db_path(db_path)
188
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
178
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
189
179
  events = storage.list_stage_events(run_id)
190
180
  if not events:
191
181
  console.print("[yellow]No stage events found.[/yellow]")
@@ -218,8 +208,7 @@ def create_stage_app(console: Console) -> typer.Typer:
218
208
  db_path: Path | None = db_option(help_text="Path to database file."),
219
209
  ) -> None:
220
210
  """Compute stage metrics from stored events."""
221
- resolved_db_path = _resolve_db_path(db_path)
222
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
211
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
223
212
  events = storage.list_stage_events(run_id)
224
213
  if not events:
225
214
  console.print("[yellow]No stage events found.[/yellow]")
@@ -276,8 +265,7 @@ def create_stage_app(console: Console) -> typer.Typer:
276
265
  db_path: Path | None = db_option(help_text="Path to database file."),
277
266
  ) -> None:
278
267
  """Report stage summary, metrics, and improvement guides."""
279
- resolved_db_path = _resolve_db_path(db_path)
280
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
268
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
281
269
  events = storage.list_stage_events(run_id)
282
270
  if not events:
283
271
  console.print("[yellow]No stage events found.[/yellow]")
@@ -547,13 +535,6 @@ def _load_default_profile() -> str | None:
547
535
  return None
548
536
 
549
537
 
550
- def _resolve_db_path(db_path: Path | None) -> Path:
551
- resolved = db_path or Settings().evalvault_db_path
552
- if resolved is None:
553
- raise typer.BadParameter("Database path is not configured.")
554
- return resolved
555
-
556
-
557
538
  def _print_stage_summary(console: Console, summary_data) -> None:
558
539
  table = Table(show_header=True, header_style="bold cyan")
559
540
  table.add_column("Stage Type")
@@ -31,11 +31,11 @@ def profile_option(
31
31
  def db_option(
32
32
  *,
33
33
  default: str | Path | None = _UNSET,
34
- help_text: str = "Path to SQLite database file.",
34
+ help_text: str = "SQLite DB path (PostgreSQL is default when omitted).",
35
35
  ) -> Path | None:
36
36
  """Shared --db / -D option definition."""
37
37
 
38
- resolved_default = Settings().evalvault_db_path if default is _UNSET else default
38
+ resolved_default = None if default is _UNSET else default
39
39
  normalized_default = _normalize_path(resolved_default)
40
40
  return typer.Option(
41
41
  normalized_default,
@@ -49,11 +49,17 @@ def db_option(
49
49
  def memory_db_option(
50
50
  *,
51
51
  default: str | Path | None = _UNSET,
52
- help_text: str = "Path to Domain Memory SQLite database.",
52
+ help_text: str = "Domain Memory SQLite path (Postgres is default when omitted).",
53
53
  ) -> Path | None:
54
54
  """Shared option factory for the domain memory database path."""
55
55
 
56
- resolved_default = Settings().evalvault_memory_db_path if default is _UNSET else default
56
+ if default is _UNSET:
57
+ settings = Settings()
58
+ resolved_default = (
59
+ settings.evalvault_memory_db_path if settings.db_backend == "sqlite" else None
60
+ )
61
+ else:
62
+ resolved_default = default
57
63
  normalized_default = _normalize_path(resolved_default)
58
64
  return typer.Option(
59
65
  normalized_default,
@@ -20,12 +20,13 @@ from evalvault.adapters.outbound.analysis.pipeline_factory import build_analysis
20
20
  from evalvault.adapters.outbound.analysis.statistical_adapter import StatisticalAnalysisAdapter
21
21
  from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
22
22
  from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
23
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
23
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
24
24
  from evalvault.config.settings import Settings, apply_profile
25
25
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
26
26
  from evalvault.domain.services.analysis_service import AnalysisService
27
27
  from evalvault.domain.services.evaluator import RagasEvaluator
28
28
  from evalvault.ports.inbound.web_port import EvalRequest, RunFilters, RunSummary
29
+ from evalvault.ports.outbound.storage_port import StoragePort
29
30
 
30
31
  from .schemas import (
31
32
  AnalyzeCompareRequest,
@@ -82,7 +83,7 @@ def list_runs(payload: dict[str, Any] | ListRunsRequest) -> ListRunsResponse:
82
83
  errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)]
83
84
  )
84
85
 
85
- storage = SQLiteStorageAdapter(db_path=db_path)
86
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
86
87
  adapter = WebUIAdapter(storage=storage, settings=Settings())
87
88
 
88
89
  filters = RunFilters(
@@ -123,7 +124,7 @@ def get_run_summary(payload: dict[str, Any] | GetRunSummaryRequest) -> GetRunSum
123
124
  errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)]
124
125
  )
125
126
 
126
- storage = SQLiteStorageAdapter(db_path=db_path)
127
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
127
128
  try:
128
129
  run = storage.get_run(request.run_id)
129
130
  except KeyError as exc:
@@ -175,7 +176,7 @@ def run_evaluation(payload: dict[str, Any] | RunEvaluationRequest) -> RunEvaluat
175
176
  errors=[_error("EVAL_LLM_INIT_FAILED", str(exc), stage=ErrorStage.evaluate)],
176
177
  )
177
178
 
178
- storage = SQLiteStorageAdapter(db_path=db_path)
179
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
179
180
  llm_factory = SettingsLLMFactory(settings)
180
181
  korean_toolkit = try_create_korean_toolkit()
181
182
  evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
@@ -266,7 +267,7 @@ def analyze_compare(payload: dict[str, Any] | AnalyzeCompareRequest) -> AnalyzeC
266
267
  errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)],
267
268
  )
268
269
 
269
- storage = SQLiteStorageAdapter(db_path=db_path)
270
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
270
271
  try:
271
272
  run_a = storage.get_run(request.run_id_a)
272
273
  run_b = storage.get_run(request.run_id_b)
@@ -503,9 +504,11 @@ def _serialize_run_summary(summary: RunSummary) -> RunSummaryPayload:
503
504
  return RunSummaryPayload.model_validate(payload)
504
505
 
505
506
 
506
- def _resolve_db_path(db_path: Path | None) -> Path:
507
+ def _resolve_db_path(db_path: Path | None) -> Path | None:
508
+ settings = Settings()
507
509
  if db_path is None:
508
- settings = Settings()
510
+ if getattr(settings, "db_backend", "postgres") != "sqlite":
511
+ return None
509
512
  db_path = Path(settings.evalvault_db_path)
510
513
  resolved = db_path.expanduser().resolve()
511
514
  _ensure_allowed_path(resolved)
@@ -547,7 +550,7 @@ def _run_auto_analysis(
547
550
  *,
548
551
  run_id: str,
549
552
  run: Any,
550
- storage: SQLiteStorageAdapter,
553
+ storage: StoragePort,
551
554
  llm_adapter: Any,
552
555
  analysis_output: Path | None,
553
556
  analysis_report: Path | None,
@@ -152,6 +152,20 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
152
152
  errors.append(str(exc))
153
153
  retriever = None
154
154
 
155
+ if retriever is None and (backend_hint == "vllm" or embedding_profile == "vllm"):
156
+ try:
157
+ from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
158
+
159
+ adapter = VLLMAdapter(settings)
160
+ retriever = KoreanDenseRetriever(
161
+ model_name=model_name or settings.vllm_embedding_model,
162
+ ollama_adapter=adapter,
163
+ profile=embedding_profile,
164
+ )
165
+ except Exception as exc:
166
+ errors.append(str(exc))
167
+ retriever = None
168
+
155
169
  if retriever is None and backend_hint != "ollama":
156
170
  try:
157
171
  retriever = KoreanDenseRetriever(model_name=model_name)
@@ -166,7 +180,9 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
166
180
  batch_size=batch_size if isinstance(batch_size, int) else None,
167
181
  )
168
182
  meta = {
169
- "backend": "ollama"
183
+ "backend": "vllm"
184
+ if backend_hint == "vllm" or embedding_profile == "vllm"
185
+ else "ollama"
170
186
  if retriever.model_name.startswith("qwen3")
171
187
  else "sentence-transformers",
172
188
  "model": retriever.model_name,
@@ -77,6 +77,20 @@ class EmbeddingSearcherModule(BaseAnalysisModule):
77
77
  errors.append(str(exc))
78
78
  retriever = None
79
79
 
80
+ if retriever is None and embedding_profile == "vllm":
81
+ try:
82
+ from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
83
+
84
+ adapter = VLLMAdapter(settings)
85
+ retriever = KoreanDenseRetriever(
86
+ model_name=settings.vllm_embedding_model,
87
+ ollama_adapter=adapter,
88
+ profile=embedding_profile,
89
+ )
90
+ except Exception as exc:
91
+ errors.append(str(exc))
92
+ retriever = None
93
+
80
94
  if retriever is None:
81
95
  try:
82
96
  retriever = KoreanDenseRetriever(model_name=model_name)
@@ -1,7 +1,11 @@
1
1
  """Domain Memory adapters for factual, experiential, and working memory layers."""
2
2
 
3
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import (
4
- SQLiteDomainMemoryAdapter,
5
- )
3
+ from evalvault.adapters.outbound.domain_memory.factory import build_domain_memory_adapter
4
+ from evalvault.adapters.outbound.domain_memory.postgres_adapter import PostgresDomainMemoryAdapter
5
+ from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
6
6
 
7
- __all__ = ["SQLiteDomainMemoryAdapter"]
7
+ __all__ = [
8
+ "SQLiteDomainMemoryAdapter",
9
+ "PostgresDomainMemoryAdapter",
10
+ "build_domain_memory_adapter",
11
+ ]