evalvault 1.73.2__py3-none-any.whl → 1.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. evalvault/adapters/inbound/api/adapter.py +66 -17
  2. evalvault/adapters/inbound/api/routers/calibration.py +9 -9
  3. evalvault/adapters/inbound/api/routers/chat.py +604 -37
  4. evalvault/adapters/inbound/api/routers/domain.py +10 -5
  5. evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
  6. evalvault/adapters/inbound/api/routers/runs.py +23 -4
  7. evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
  8. evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
  9. evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
  10. evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
  11. evalvault/adapters/inbound/cli/commands/compare.py +2 -7
  12. evalvault/adapters/inbound/cli/commands/debug.py +3 -2
  13. evalvault/adapters/inbound/cli/commands/domain.py +12 -12
  14. evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
  15. evalvault/adapters/inbound/cli/commands/gate.py +3 -2
  16. evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
  17. evalvault/adapters/inbound/cli/commands/history.py +3 -12
  18. evalvault/adapters/inbound/cli/commands/method.py +1 -2
  19. evalvault/adapters/inbound/cli/commands/ops.py +2 -2
  20. evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
  21. evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
  22. evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
  23. evalvault/adapters/inbound/cli/commands/regress.py +5 -4
  24. evalvault/adapters/inbound/cli/commands/run.py +42 -31
  25. evalvault/adapters/inbound/cli/commands/run_helpers.py +24 -15
  26. evalvault/adapters/inbound/cli/commands/stage.py +6 -25
  27. evalvault/adapters/inbound/cli/utils/options.py +10 -4
  28. evalvault/adapters/inbound/mcp/tools.py +11 -8
  29. evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
  30. evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
  31. evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
  32. evalvault/adapters/outbound/domain_memory/factory.py +68 -0
  33. evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
  34. evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
  35. evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
  36. evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
  37. evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
  38. evalvault/adapters/outbound/ops/__init__.py +5 -0
  39. evalvault/adapters/outbound/ops/report_renderer.py +159 -0
  40. evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
  41. evalvault/adapters/outbound/storage/base_sql.py +3 -2
  42. evalvault/adapters/outbound/storage/factory.py +53 -0
  43. evalvault/adapters/outbound/storage/postgres_adapter.py +90 -0
  44. evalvault/adapters/outbound/storage/postgres_schema.sql +15 -0
  45. evalvault/adapters/outbound/storage/schema.sql +14 -0
  46. evalvault/adapters/outbound/storage/sqlite_adapter.py +77 -0
  47. evalvault/config/settings.py +31 -7
  48. evalvault/domain/entities/ops_report.py +40 -0
  49. evalvault/domain/services/domain_learning_hook.py +2 -1
  50. evalvault/domain/services/ops_report_service.py +192 -0
  51. evalvault/ports/inbound/web_port.py +3 -1
  52. evalvault/ports/outbound/storage_port.py +2 -0
  53. evalvault-1.75.0.dist-info/METADATA +221 -0
  54. {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/RECORD +57 -48
  55. evalvault-1.73.2.dist-info/METADATA +0 -585
  56. {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/WHEEL +0 -0
  57. {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/entry_points.txt +0 -0
  58. {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -21,6 +21,7 @@ from evalvault.adapters.outbound.analysis import (
21
21
  )
22
22
  from evalvault.adapters.outbound.cache import MemoryCacheAdapter
23
23
  from evalvault.adapters.outbound.judge_calibration_reporter import JudgeCalibrationReporter
24
+ from evalvault.adapters.outbound.ops.report_renderer import render_json, render_markdown
24
25
  from evalvault.adapters.outbound.report import MarkdownReportAdapter
25
26
  from evalvault.config.phoenix_support import PhoenixExperimentResolver
26
27
  from evalvault.config.settings import Settings
@@ -43,6 +44,7 @@ from evalvault.domain.services.analysis_service import AnalysisService
43
44
  from evalvault.domain.services.cluster_map_builder import build_cluster_map
44
45
  from evalvault.domain.services.debug_report_service import DebugReportService
45
46
  from evalvault.domain.services.judge_calibration_service import JudgeCalibrationService
47
+ from evalvault.domain.services.ops_report_service import OpsReportService
46
48
  from evalvault.domain.services.prompt_registry import (
47
49
  PromptInput,
48
50
  build_prompt_bundle,
@@ -125,14 +127,15 @@ class WebUIAdapter:
125
127
  llm_adapter: LLM 어댑터 (선택적)
126
128
  data_loader: 데이터 로더 (선택적)
127
129
  """
128
- resolved_settings = settings
130
+ resolved_settings = settings or Settings()
129
131
  if storage is None:
130
- resolved_settings = settings or Settings()
131
- db_path = getattr(resolved_settings, "evalvault_db_path", None)
132
- if db_path:
133
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
132
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
134
133
 
135
- storage = SQLiteStorageAdapter(db_path=db_path)
134
+ try:
135
+ storage = build_storage_adapter(settings=resolved_settings)
136
+ except Exception as exc:
137
+ logger.warning("Storage initialization failed: %s", exc)
138
+ storage = None
136
139
 
137
140
  self._storage = storage
138
141
  self._evaluator = evaluator
@@ -448,7 +451,12 @@ class WebUIAdapter:
448
451
  memory_domain = memory_config.get("domain") or dataset.metadata.get("domain") or "default"
449
452
  memory_language = memory_config.get("language") or "ko"
450
453
  memory_augment = bool(memory_config.get("augment_context"))
451
- memory_db_path = memory_config.get("db_path") or settings.evalvault_memory_db_path
454
+ if memory_config.get("db_path"):
455
+ memory_db_path = memory_config.get("db_path")
456
+ elif settings.db_backend == "sqlite":
457
+ memory_db_path = settings.evalvault_memory_db_path
458
+ else:
459
+ memory_db_path = None
452
460
  memory_evaluator = None
453
461
  requested_thresholds = request.thresholds or {}
454
462
  if request.threshold_profile or requested_thresholds:
@@ -470,16 +478,17 @@ class WebUIAdapter:
470
478
  memory_active = False
471
479
  if memory_enabled:
472
480
  try:
473
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import (
474
- SQLiteDomainMemoryAdapter,
475
- )
481
+ from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
476
482
  from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import (
477
483
  PhoenixTracerAdapter,
478
484
  )
479
485
  from evalvault.domain.services.memory_aware_evaluator import MemoryAwareEvaluator
480
486
 
481
487
  tracer = PhoenixTracerAdapter() if tracker_provider == "phoenix" else None
482
- memory_adapter = SQLiteDomainMemoryAdapter(memory_db_path)
488
+ memory_adapter = build_domain_memory_adapter(
489
+ settings=self._settings,
490
+ db_path=Path(memory_db_path) if memory_db_path else None,
491
+ )
483
492
  memory_evaluator = MemoryAwareEvaluator(
484
493
  evaluator=self._evaluator,
485
494
  memory_port=memory_adapter,
@@ -812,6 +821,7 @@ class WebUIAdapter:
812
821
  def list_runs(
813
822
  self,
814
823
  limit: int = 50,
824
+ offset: int = 0,
815
825
  filters: RunFilters | None = None,
816
826
  ) -> list[RunSummary]:
817
827
  """평가 목록 조회.
@@ -831,7 +841,7 @@ class WebUIAdapter:
831
841
 
832
842
  try:
833
843
  # 저장소에서 평가 목록 조회
834
- runs = self._storage.list_runs(limit=limit)
844
+ runs = self._storage.list_runs(limit=limit, offset=offset)
835
845
 
836
846
  # RunSummary로 변환
837
847
  summaries = []
@@ -1027,7 +1037,11 @@ class WebUIAdapter:
1027
1037
  run = self.get_run_details(run_id)
1028
1038
  feedbacks = storage.list_feedback(run_id)
1029
1039
  if labels_source in {"feedback", "hybrid"} and not feedbacks:
1030
- raise ValueError("Feedback labels are required for this labels_source")
1040
+ raise ValueError(
1041
+ f"No feedback labels found for run '{run_id}'. "
1042
+ f"Calibration with labels_source='{labels_source}' requires at least one feedback label. "
1043
+ "Please add feedback labels via the UI or API, or use labels_source='gold' if gold labels are available."
1044
+ )
1031
1045
  resolved_metrics = metrics or list(run.metrics_evaluated)
1032
1046
  if not resolved_metrics:
1033
1047
  raise ValueError("No metrics available for calibration")
@@ -1329,6 +1343,42 @@ class WebUIAdapter:
1329
1343
  stage_storage=stage_storage,
1330
1344
  )
1331
1345
 
1346
+ def generate_ops_report(
1347
+ self,
1348
+ run_id: str,
1349
+ *,
1350
+ output_format: str,
1351
+ save: bool,
1352
+ ) -> dict[str, Any] | str:
1353
+ if self._storage is None:
1354
+ raise RuntimeError("Storage not configured")
1355
+ if not hasattr(self._storage, "list_stage_events"):
1356
+ raise RuntimeError("Stage storage not configured")
1357
+
1358
+ service = OpsReportService()
1359
+ stage_storage = cast(StageStoragePort, self._storage)
1360
+ report = service.build_report(
1361
+ run_id,
1362
+ storage=self._storage,
1363
+ stage_storage=stage_storage,
1364
+ )
1365
+
1366
+ content = render_markdown(report) if output_format == "markdown" else render_json(report)
1367
+
1368
+ if save:
1369
+ self._storage.save_ops_report(
1370
+ report_id=None,
1371
+ run_id=run_id,
1372
+ report_type="ops_report",
1373
+ format=output_format,
1374
+ content=content,
1375
+ metadata={"source": "api"},
1376
+ )
1377
+
1378
+ if output_format == "markdown":
1379
+ return content
1380
+ return report.to_dict()
1381
+
1332
1382
  def delete_run(self, run_id: str) -> bool:
1333
1383
  """평가 삭제.
1334
1384
 
@@ -2160,16 +2210,15 @@ def create_adapter() -> WebUIAdapter:
2160
2210
  """
2161
2211
  from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
2162
2212
  from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
2163
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
2213
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
2164
2214
  from evalvault.config.settings import get_settings
2165
2215
  from evalvault.domain.services.evaluator import RagasEvaluator
2166
2216
 
2167
2217
  # 설정 로드
2168
2218
  settings = get_settings()
2169
2219
 
2170
- # Storage 생성 (기본 SQLite)
2171
- db_path = Path(settings.evalvault_db_path)
2172
- storage = SQLiteStorageAdapter(db_path=db_path)
2220
+ # Storage 생성
2221
+ storage = build_storage_adapter(settings=settings)
2173
2222
 
2174
2223
  # LLM adapter 생성 (API 키 없으면 None)
2175
2224
  llm_adapter = None
@@ -113,6 +113,15 @@ def run_judge_calibration(
113
113
  return JudgeCalibrationResponse.model_validate(payload)
114
114
 
115
115
 
116
+ @router.get("/judge/history", response_model=list[JudgeCalibrationHistoryItem])
117
+ def list_calibrations(
118
+ adapter: AdapterDep,
119
+ limit: int = Query(20, ge=1, le=200),
120
+ ) -> list[JudgeCalibrationHistoryItem]:
121
+ entries = adapter.list_judge_calibrations(limit=limit)
122
+ return [JudgeCalibrationHistoryItem.model_validate(entry) for entry in entries]
123
+
124
+
116
125
  @router.get("/judge/{calibration_id}", response_model=JudgeCalibrationResponse)
117
126
  def get_calibration_result(calibration_id: str, adapter: AdapterDep) -> JudgeCalibrationResponse:
118
127
  try:
@@ -122,12 +131,3 @@ def get_calibration_result(calibration_id: str, adapter: AdapterDep) -> JudgeCal
122
131
  except RuntimeError as exc:
123
132
  raise HTTPException(status_code=500, detail=str(exc)) from exc
124
133
  return JudgeCalibrationResponse.model_validate(payload)
125
-
126
-
127
- @router.get("/judge/history", response_model=list[JudgeCalibrationHistoryItem])
128
- def list_calibrations(
129
- adapter: AdapterDep,
130
- limit: int = Query(20, ge=1, le=200),
131
- ) -> list[JudgeCalibrationHistoryItem]:
132
- entries = adapter.list_judge_calibrations(limit=limit)
133
- return [JudgeCalibrationHistoryItem.model_validate(entry) for entry in entries]