evalvault 1.74.0__py3-none-any.whl → 1.76.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. evalvault/adapters/inbound/api/adapter.py +127 -80
  2. evalvault/adapters/inbound/api/routers/calibration.py +9 -9
  3. evalvault/adapters/inbound/api/routers/chat.py +303 -17
  4. evalvault/adapters/inbound/api/routers/config.py +3 -1
  5. evalvault/adapters/inbound/api/routers/domain.py +10 -5
  6. evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
  7. evalvault/adapters/inbound/api/routers/runs.py +23 -4
  8. evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
  9. evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
  10. evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
  11. evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
  12. evalvault/adapters/inbound/cli/commands/compare.py +2 -7
  13. evalvault/adapters/inbound/cli/commands/debug.py +3 -2
  14. evalvault/adapters/inbound/cli/commands/domain.py +12 -12
  15. evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
  16. evalvault/adapters/inbound/cli/commands/gate.py +3 -2
  17. evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
  18. evalvault/adapters/inbound/cli/commands/history.py +3 -12
  19. evalvault/adapters/inbound/cli/commands/method.py +3 -4
  20. evalvault/adapters/inbound/cli/commands/ops.py +2 -2
  21. evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
  22. evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
  23. evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
  24. evalvault/adapters/inbound/cli/commands/regress.py +5 -4
  25. evalvault/adapters/inbound/cli/commands/run.py +188 -59
  26. evalvault/adapters/inbound/cli/commands/run_helpers.py +181 -70
  27. evalvault/adapters/inbound/cli/commands/stage.py +6 -25
  28. evalvault/adapters/inbound/cli/utils/options.py +10 -4
  29. evalvault/adapters/inbound/mcp/tools.py +11 -8
  30. evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
  31. evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
  32. evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
  33. evalvault/adapters/outbound/domain_memory/factory.py +68 -0
  34. evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
  35. evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
  36. evalvault/adapters/outbound/llm/factory.py +1 -1
  37. evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
  38. evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
  39. evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
  40. evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
  41. evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
  42. evalvault/adapters/outbound/storage/base_sql.py +3 -2
  43. evalvault/adapters/outbound/storage/factory.py +53 -0
  44. evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
  45. evalvault/adapters/outbound/tracker/mlflow_adapter.py +209 -54
  46. evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
  47. evalvault/config/instrumentation.py +8 -6
  48. evalvault/config/phoenix_support.py +5 -0
  49. evalvault/config/settings.py +71 -11
  50. evalvault/domain/services/domain_learning_hook.py +2 -1
  51. evalvault/domain/services/evaluator.py +2 -0
  52. evalvault/ports/inbound/web_port.py +3 -1
  53. evalvault/ports/outbound/storage_port.py +2 -0
  54. evalvault-1.76.0.dist-info/METADATA +221 -0
  55. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/RECORD +58 -53
  56. evalvault-1.74.0.dist-info/METADATA +0 -585
  57. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/WHEEL +0 -0
  58. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/entry_points.txt +0 -0
  59. {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -8,6 +8,7 @@ import os
8
8
  import re
9
9
  import time
10
10
  from collections.abc import AsyncGenerator
11
+ from dataclasses import dataclass
11
12
  from datetime import UTC, datetime
12
13
  from pathlib import Path
13
14
  from typing import Any
@@ -34,6 +35,13 @@ _RAG_TEXTS: list[str] = []
34
35
  _RAG_INITIALIZED = False
35
36
 
36
37
 
38
+ @dataclass(frozen=True)
39
+ class _RagHit:
40
+ document: str
41
+ score: float
42
+ doc_id: int
43
+
44
+
37
45
  class ChatMessage(BaseModel):
38
46
  role: str
39
47
  content: str
@@ -315,14 +323,121 @@ async def _get_rag_retriever() -> tuple[Any | None, int]:
315
323
  if not _RAG_TEXTS:
316
324
  return None, 0
317
325
 
318
- from evalvault.adapters.outbound.nlp.korean.bm25_retriever import KoreanBM25Retriever
319
- from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
326
+ from evalvault.adapters.outbound.nlp.korean.toolkit import KoreanNLPToolkit
327
+
328
+ use_hybrid = os.getenv("EVALVAULT_RAG_USE_HYBRID", "true").lower() == "true"
329
+ embedding_profile = os.getenv("EVALVAULT_RAG_EMBEDDING_PROFILE", "dev")
330
+ vector_store = os.getenv("EVALVAULT_RAG_VECTOR_STORE", "pgvector").lower()
331
+ pgvector_index = os.getenv("EVALVAULT_RAG_PGVECTOR_INDEX", "hnsw").lower()
332
+ pgvector_index_lists = int(os.getenv("EVALVAULT_RAG_PGVECTOR_INDEX_LISTS", "100"))
333
+ pgvector_hnsw_m = int(os.getenv("EVALVAULT_RAG_PGVECTOR_HNSW_M", "16"))
334
+ pgvector_hnsw_ef = int(os.getenv("EVALVAULT_RAG_PGVECTOR_HNSW_EF_CONSTRUCTION", "64"))
320
335
 
321
- tokenizer = KiwiTokenizer()
322
- retriever = KoreanBM25Retriever(tokenizer=tokenizer)
323
- retriever.index(list(_RAG_TEXTS))
324
- if tokens and len(tokens) == len(_RAG_TEXTS):
325
- retriever._tokenized_docs = tokens
336
+ def _build_conn_string() -> str | None:
337
+ try:
338
+ from evalvault.config.settings import Settings
339
+
340
+ settings = Settings()
341
+ if settings.postgres_connection_string:
342
+ return settings.postgres_connection_string
343
+ if settings.postgres_host:
344
+ return "host={host} port={port} dbname={dbname} user={user} password={password}".format(
345
+ host=settings.postgres_host,
346
+ port=settings.postgres_port,
347
+ dbname=settings.postgres_database,
348
+ user=settings.postgres_user or "postgres",
349
+ password=settings.postgres_password or "",
350
+ )
351
+ except Exception as exc:
352
+ logger.warning("Failed to build Postgres connection string: %s", exc)
353
+ return None
354
+
355
+ ollama_adapter = None
356
+ dense_retriever = None
357
+ embedding_func = None
358
+ if embedding_profile:
359
+ try:
360
+ from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
361
+ from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
362
+ from evalvault.config.settings import Settings
363
+
364
+ settings = Settings()
365
+ ollama_adapter = OllamaAdapter(settings)
366
+ dense_retriever = KoreanDenseRetriever(
367
+ profile=embedding_profile,
368
+ ollama_adapter=ollama_adapter,
369
+ )
370
+ embedding_func = dense_retriever.get_embedding_func()
371
+ except Exception as exc: # pragma: no cover - runtime dependency
372
+ logger.warning("Failed to initialize dense retriever: %s", exc)
373
+
374
+ if vector_store == "pgvector" and embedding_func is not None:
375
+ conn_string = _build_conn_string()
376
+ if conn_string:
377
+ try:
378
+ from evalvault.adapters.outbound.nlp.korean.bm25_retriever import (
379
+ KoreanBM25Retriever,
380
+ )
381
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
382
+ from evalvault.adapters.outbound.retriever.pgvector_store import PgvectorStore
383
+
384
+ store = PgvectorStore(
385
+ conn_string,
386
+ index_type=pgvector_index,
387
+ index_lists=pgvector_index_lists,
388
+ hnsw_m=pgvector_hnsw_m,
389
+ hnsw_ef_construction=pgvector_hnsw_ef,
390
+ )
391
+ embedding_dim = (
392
+ dense_retriever.dimension if dense_retriever else len(embedding_func(["x"])[0])
393
+ )
394
+ store.ensure_schema(dimension=embedding_dim)
395
+ source_hash = _hash_text(content)
396
+ existing_hash, existing_count = store.get_source_state(source="user_guide")
397
+ if existing_hash != source_hash or existing_count != len(_RAG_TEXTS):
398
+ embeddings = embedding_func(list(_RAG_TEXTS))
399
+ store.replace_documents(
400
+ source="user_guide",
401
+ source_hash=source_hash,
402
+ documents=list(_RAG_TEXTS),
403
+ embeddings=embeddings,
404
+ )
405
+
406
+ tokenizer = KiwiTokenizer()
407
+ bm25_retriever = KoreanBM25Retriever(tokenizer=tokenizer)
408
+ bm25_retriever.index(list(_RAG_TEXTS))
409
+ if tokens and len(tokens) == len(_RAG_TEXTS):
410
+ bm25_retriever._tokenized_docs = tokens
411
+
412
+ if use_hybrid:
413
+ retriever = _PgvectorHybridRetriever(
414
+ bm25_retriever=bm25_retriever,
415
+ store=store,
416
+ embedding_func=embedding_func,
417
+ documents=list(_RAG_TEXTS),
418
+ )
419
+ else:
420
+ retriever = _PgvectorDenseRetriever(
421
+ store=store,
422
+ embedding_func=embedding_func,
423
+ documents=list(_RAG_TEXTS),
424
+ )
425
+
426
+ _RAG_RETRIEVER = retriever
427
+ return retriever, _RAG_DOCS_COUNT
428
+ except Exception as exc:
429
+ logger.warning("pgvector retriever setup failed: %s", exc)
430
+
431
+ toolkit = KoreanNLPToolkit()
432
+ retriever = toolkit.build_retriever(
433
+ list(_RAG_TEXTS),
434
+ use_hybrid=use_hybrid,
435
+ ollama_adapter=ollama_adapter,
436
+ embedding_profile=embedding_profile,
437
+ verbose=True,
438
+ )
439
+ if retriever is None:
440
+ return None, 0
326
441
 
327
442
  _RAG_RETRIEVER = retriever
328
443
  return retriever, _RAG_DOCS_COUNT
@@ -384,11 +499,153 @@ def _simple_retrieve(texts: list[str], query: str, top_k: int) -> list[str]:
384
499
  return [text for _, text in scored[:top_k]]
385
500
 
386
501
 
502
+ def _rrf_fuse(
503
+ *,
504
+ bm25_results: list[Any],
505
+ dense_results: list[Any],
506
+ documents: list[str],
507
+ top_k: int,
508
+ bm25_weight: float = 0.4,
509
+ dense_weight: float = 0.6,
510
+ rrf_k: int = 60,
511
+ ) -> list[_RagHit]:
512
+ scores: dict[int, float] = {}
513
+
514
+ for rank, result in enumerate(bm25_results, 1):
515
+ doc_id = int(result.doc_id)
516
+ scores[doc_id] = scores.get(doc_id, 0.0) + (bm25_weight / (rrf_k + rank))
517
+
518
+ for rank, result in enumerate(dense_results, 1):
519
+ doc_id = int(result.doc_id)
520
+ scores[doc_id] = scores.get(doc_id, 0.0) + (dense_weight / (rrf_k + rank))
521
+
522
+ ranked = sorted(scores.items(), key=lambda item: item[1], reverse=True)
523
+ hits: list[_RagHit] = []
524
+ for doc_id, score in ranked[:top_k]:
525
+ if 0 <= doc_id < len(documents):
526
+ hits.append(_RagHit(document=documents[doc_id], score=score, doc_id=doc_id))
527
+ return hits
528
+
529
+
530
+ class _PgvectorDenseRetriever:
531
+ def __init__(self, store: Any, embedding_func: Any, documents: list[str]) -> None:
532
+ self._store = store
533
+ self._embedding_func = embedding_func
534
+ self._documents = documents
535
+
536
+ def search(self, query: str, top_k: int = 5) -> list[_RagHit]:
537
+ query_embedding = self._embedding_func([query])[0]
538
+ results = self._store.search(
539
+ source="user_guide", query_embedding=query_embedding, top_k=top_k
540
+ )
541
+ hits: list[_RagHit] = []
542
+ for result in results:
543
+ if 0 <= result.doc_id < len(self._documents):
544
+ hits.append(
545
+ _RagHit(
546
+ document=self._documents[result.doc_id],
547
+ score=float(result.score),
548
+ doc_id=result.doc_id,
549
+ )
550
+ )
551
+ return hits
552
+
553
+
554
+ class _PgvectorHybridRetriever:
555
+ def __init__(
556
+ self,
557
+ *,
558
+ bm25_retriever: Any,
559
+ store: Any,
560
+ embedding_func: Any,
561
+ documents: list[str],
562
+ ) -> None:
563
+ self._bm25 = bm25_retriever
564
+ self._store = store
565
+ self._embedding_func = embedding_func
566
+ self._documents = documents
567
+
568
+ def search(self, query: str, top_k: int = 5) -> list[_RagHit]:
569
+ bm25_results = self._bm25.search(query, top_k=len(self._documents))
570
+ query_embedding = self._embedding_func([query])[0]
571
+ dense_results = self._store.search(
572
+ source="user_guide", query_embedding=query_embedding, top_k=len(self._documents)
573
+ )
574
+ dense_results = sorted(dense_results, key=lambda item: item.score)
575
+ return _rrf_fuse(
576
+ bm25_results=bm25_results,
577
+ dense_results=dense_results,
578
+ documents=self._documents,
579
+ top_k=top_k,
580
+ )
581
+
582
+
583
+ def _read_text_limited(path: Path, limit: int = 4000) -> str | None:
584
+ try:
585
+ if not path.exists():
586
+ return None
587
+ content = path.read_text(encoding="utf-8", errors="ignore")
588
+ except Exception as exc:
589
+ logger.warning("Failed to read %s: %s", path, exc)
590
+ return None
591
+ content = content.strip()
592
+ if not content:
593
+ return None
594
+ if len(content) > limit:
595
+ return content[:limit] + "..."
596
+ return content
597
+
598
+
599
+ async def _build_run_context(run_id: str) -> list[str]:
600
+ contexts: list[str] = []
601
+ try:
602
+ summary_result = await _call_mcp_tool("get_run_summary", {"run_id": run_id})
603
+ payload = _extract_json_content(summary_result)
604
+ if isinstance(payload, dict):
605
+ contexts.append("[RUN 요약]\n" + _summarize_run_summary(payload))
606
+ except Exception as exc:
607
+ logger.warning("Failed to fetch run summary: %s", exc)
608
+
609
+ try:
610
+ artifacts_result = await _call_mcp_tool(
611
+ "get_artifacts", {"run_id": run_id, "kind": "analysis"}
612
+ )
613
+ payload = _extract_json_content(artifacts_result)
614
+ if isinstance(payload, dict):
615
+ contexts.append("[RUN 아티팩트]\n" + _summarize_artifacts(payload))
616
+ artifacts = payload.get("artifacts") or {}
617
+ report_path = artifacts.get("report_path")
618
+ if isinstance(report_path, str) and report_path:
619
+ report_text = _read_text_limited(Path(report_path))
620
+ if report_text:
621
+ contexts.append("[REPORT 발췌]\n" + report_text)
622
+ except Exception as exc:
623
+ logger.warning("Failed to fetch run artifacts: %s", exc)
624
+
625
+ return contexts
626
+
627
+
387
628
  async def _rag_answer(
388
629
  user_text: str, run_id: str | None = None, category: str | None = None
389
630
  ) -> str | None:
390
- retriever, _ = await _get_rag_retriever()
391
631
  contexts: list[str] = []
632
+ rag_llm_enabled = os.getenv("EVALVAULT_RAG_LLM_ENABLED", "true").lower() == "true"
633
+ run_context_enabled = os.getenv("EVALVAULT_CHAT_RUN_CONTEXT_ENABLED", "true").lower() == "true"
634
+
635
+ if run_id and rag_llm_enabled and run_context_enabled:
636
+ contexts.extend(await _build_run_context(run_id))
637
+
638
+ if not rag_llm_enabled and contexts:
639
+ return "\n\n".join(contexts[:3])
640
+
641
+ if not rag_llm_enabled:
642
+ content = _load_user_guide_text()
643
+ if content:
644
+ chunks = [chunk.strip() for chunk in content.split("\n\n") if chunk.strip()]
645
+ contexts.extend(_simple_retrieve(chunks, user_text, top_k=5))
646
+ return "\n\n".join(contexts[:3]) if contexts else None
647
+
648
+ retriever, _ = await _get_rag_retriever()
392
649
 
393
650
  if retriever is not None:
394
651
  results = retriever.search(user_text, top_k=5)
@@ -403,7 +660,7 @@ async def _rag_answer(
403
660
  if not contexts:
404
661
  return None
405
662
 
406
- if os.getenv("EVALVAULT_RAG_LLM_ENABLED", "true").lower() != "true":
663
+ if not rag_llm_enabled:
407
664
  return "\n\n".join(contexts[:3])
408
665
 
409
666
  prompt = (
@@ -431,15 +688,24 @@ async def _rag_answer(
431
688
  if options:
432
689
  payload["options"] = options
433
690
 
434
- async with httpx.AsyncClient(timeout=60) as client:
435
- response = await client.post(
436
- f"{os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')}/api/chat",
437
- json=payload,
438
- )
439
- response.raise_for_status()
440
- data = response.json()
691
+ fallback = "\n\n".join(contexts[:3])
692
+ chat_timeout = int(os.getenv("OLLAMA_CHAT_TIMEOUT_SECONDS", "180"))
693
+ try:
694
+ async with httpx.AsyncClient(timeout=chat_timeout) as client:
695
+ response = await client.post(
696
+ f"{os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')}/api/chat",
697
+ json=payload,
698
+ )
699
+ response.raise_for_status()
700
+ data = response.json()
701
+ except httpx.ReadTimeout:
702
+ logger.warning("Ollama chat timed out; returning retrieved contexts")
703
+ return fallback or None
704
+ except httpx.HTTPError as exc:
705
+ logger.warning("Ollama chat failed: %s", exc)
706
+ return fallback or None
441
707
 
442
- return data.get("message", {}).get("content", "").strip() or None
708
+ return data.get("message", {}).get("content", "").strip() or fallback or None
443
709
 
444
710
 
445
711
  async def _call_mcp_tool(tool_name: str, tool_args: dict[str, Any]) -> Any:
@@ -665,6 +931,17 @@ async def _chat_stream(
665
931
  user_text: str, run_id: str | None = None, category: str | None = None
666
932
  ) -> AsyncGenerator[str, None]:
667
933
  started_at = time.perf_counter()
934
+ simple_mode = os.getenv("EVALVAULT_CHAT_SIMPLE_MODE", "false").lower() == "true"
935
+ run_context_enabled = os.getenv("EVALVAULT_CHAT_RUN_CONTEXT_ENABLED", "true").lower() == "true"
936
+ if simple_mode:
937
+ yield _event({"type": "status", "message": "간단 채팅 처리 중..."})
938
+ answer = await _direct_chat_answer(user_text)
939
+ if answer:
940
+ async for item in _emit_answer(answer):
941
+ yield item
942
+ else:
943
+ yield _event({"type": "final", "content": "답변을 생성하지 못했습니다."})
944
+ return
668
945
  if category in {"result_interpretation", "improvement_direction"} and not run_id:
669
946
  yield _event(
670
947
  {
@@ -700,6 +977,7 @@ async def _chat_stream(
700
977
  _is_verb_only(user_text)
701
978
  and category in {"result_interpretation", "improvement_direction"}
702
979
  and run_id
980
+ and run_context_enabled
703
981
  ):
704
982
  yield _event({"type": "status", "message": "선택한 run 요약 중..."})
705
983
  try:
@@ -807,6 +1085,14 @@ async def _chat_stream(
807
1085
  if tool_name == "get_artifacts" and not (tool_args.get("run_id") or run_id):
808
1086
  yield _event({"type": "final", "content": "아티팩트 조회를 위해 run_id가 필요합니다."})
809
1087
  return
1088
+ if not run_context_enabled and tool_name in {"get_run_summary", "get_artifacts"}:
1089
+ yield _event(
1090
+ {
1091
+ "type": "final",
1092
+ "content": "run 요약/아티팩트 조회가 비활성화되어 있습니다.",
1093
+ }
1094
+ )
1095
+ return
810
1096
  if tool_name == "analyze_compare" and (
811
1097
  not tool_args.get("run_id_a") or not tool_args.get("run_id_b")
812
1098
  ):
@@ -71,7 +71,9 @@ class ConfigUpdateRequest(BaseModel):
71
71
  phoenix_endpoint: str | None = None
72
72
  phoenix_enabled: bool | None = None
73
73
  phoenix_sample_rate: float | None = None
74
- tracker_provider: Literal["langfuse", "mlflow", "phoenix", "none"] | None = None
74
+ phoenix_project_name: str | None = None
75
+ phoenix_annotations_enabled: bool | None = None
76
+ tracker_provider: str | None = None
75
77
  postgres_host: str | None = None
76
78
  postgres_port: int | None = None
77
79
  postgres_database: str | None = None
@@ -5,17 +5,22 @@ from __future__ import annotations
5
5
  from fastapi import APIRouter, HTTPException
6
6
  from pydantic import BaseModel
7
7
 
8
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
8
+ from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
9
9
  from evalvault.config.settings import get_settings
10
+ from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
10
11
 
11
12
  router = APIRouter()
12
- DEFAULT_MEMORY_DB_PATH = get_settings().evalvault_memory_db_path
13
+ _settings = get_settings()
14
+ DEFAULT_MEMORY_DB_PATH = (
15
+ _settings.evalvault_memory_db_path if _settings.db_backend == "sqlite" else None
16
+ )
13
17
 
14
18
 
15
- # --- Dependencies ---
16
- def get_memory_adapter(db_path: str = DEFAULT_MEMORY_DB_PATH) -> SQLiteDomainMemoryAdapter:
19
+ def get_memory_adapter(db_path: str | None = DEFAULT_MEMORY_DB_PATH) -> DomainMemoryPort:
17
20
  """Get memory adapter instance."""
18
- return SQLiteDomainMemoryAdapter(db_path)
21
+ from pathlib import Path
22
+
23
+ return build_domain_memory_adapter(db_path=Path(db_path) if db_path else None)
19
24
 
20
25
 
21
26
  # --- Pydantic Models ---
@@ -8,7 +8,7 @@ from fastapi.encoders import jsonable_encoder
8
8
  from pydantic import BaseModel
9
9
 
10
10
  from evalvault.adapters.outbound.llm import get_llm_adapter
11
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
11
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
12
12
  from evalvault.config.settings import get_settings
13
13
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
14
14
  from evalvault.domain.metrics.analysis_registry import list_analysis_metric_specs
@@ -264,9 +264,9 @@ def _intent_label(intent_value: str) -> str:
264
264
  return meta["label"] if meta else intent.value
265
265
 
266
266
 
267
- def _build_pipeline_service() -> tuple[AnalysisPipelineService, SQLiteStorageAdapter]:
267
+ def _build_pipeline_service() -> tuple[AnalysisPipelineService, Any]:
268
268
  settings = get_settings()
269
- storage = SQLiteStorageAdapter(db_path=settings.evalvault_db_path)
269
+ storage = build_storage_adapter(settings=settings)
270
270
  llm_adapter = None
271
271
  try:
272
272
  llm_adapter = get_llm_adapter(settings)
@@ -21,7 +21,7 @@ from evalvault.adapters.outbound.dataset.templates import (
21
21
  render_dataset_template_xlsx,
22
22
  )
23
23
  from evalvault.adapters.outbound.debug.report_renderer import render_markdown
24
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
24
+ from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
25
25
  from evalvault.adapters.outbound.report import DashboardGenerator
26
26
  from evalvault.config.settings import get_settings
27
27
  from evalvault.domain.entities import (
@@ -64,6 +64,7 @@ class RunSummaryResponse(BaseModel):
64
64
  phoenix_precision: float | None = None
65
65
  phoenix_drift: float | None = None
66
66
  phoenix_experiment_url: str | None = None
67
+ feedback_count: int | None = None
67
68
 
68
69
  model_config = {"from_attributes": True}
69
70
 
@@ -908,11 +909,20 @@ async def start_evaluation_endpoint(
908
909
  )
909
910
 
910
911
  try:
912
+ from pathlib import Path
913
+
911
914
  settings = get_settings()
912
- memory_db = memory_config.get("db_path") or settings.evalvault_memory_db_path
915
+ if memory_config.get("db_path"):
916
+ memory_db = memory_config.get("db_path")
917
+ elif settings.db_backend == "sqlite":
918
+ memory_db = settings.evalvault_memory_db_path
919
+ else:
920
+ memory_db = None
913
921
  domain = memory_config.get("domain") or "default"
914
922
  language = memory_config.get("language") or "ko"
915
- memory_adapter = SQLiteDomainMemoryAdapter(memory_db)
923
+ memory_adapter = build_domain_memory_adapter(
924
+ settings=settings, db_path=Path(memory_db) if memory_db else None
925
+ )
916
926
  hook = DomainLearningHook(memory_adapter)
917
927
  await hook.on_evaluation_complete(
918
928
  evaluation_run=result,
@@ -944,14 +954,22 @@ async def start_evaluation_endpoint(
944
954
  def list_runs(
945
955
  adapter: AdapterDep,
946
956
  limit: int = 50,
957
+ offset: int = Query(0, ge=0, description="Pagination offset"),
947
958
  dataset_name: str | None = Query(None, description="Filter by dataset name"),
948
959
  model_name: str | None = Query(None, description="Filter by model name"),
960
+ include_feedback: bool = Query(False, description="Include feedback count"),
949
961
  ) -> list[Any]:
950
962
  """List evaluation runs."""
951
963
  from evalvault.ports.inbound.web_port import RunFilters
952
964
 
953
965
  filters = RunFilters(dataset_name=dataset_name, model_name=model_name)
954
- summaries = adapter.list_runs(limit=limit, filters=filters)
966
+ summaries = adapter.list_runs(limit=limit, offset=offset, filters=filters)
967
+ feedback_counts: dict[str, int] = {}
968
+ if include_feedback:
969
+ feedback_counts = {
970
+ summary.run_id: adapter.get_feedback_summary(summary.run_id).total_feedback
971
+ for summary in summaries
972
+ }
955
973
 
956
974
  # Convert RunSummary dataclass to dict/Pydantic compatible format
957
975
  # The adapter returns RunSummary objects which matches our response model mostly
@@ -975,6 +993,7 @@ def list_runs(
975
993
  "phoenix_precision": s.phoenix_precision,
976
994
  "phoenix_drift": s.phoenix_drift,
977
995
  "phoenix_experiment_url": s.phoenix_experiment_url,
996
+ "feedback_count": feedback_counts.get(s.run_id) if include_feedback else None,
978
997
  }
979
998
  for s in summaries
980
999
  ]
@@ -26,7 +26,8 @@ from evalvault.adapters.outbound.analysis.pipeline_helpers import to_serializabl
26
26
  from evalvault.adapters.outbound.cache import MemoryCacheAdapter
27
27
  from evalvault.adapters.outbound.llm import get_llm_adapter
28
28
  from evalvault.adapters.outbound.report import DashboardGenerator, MarkdownReportAdapter
29
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
29
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
30
+ from evalvault.adapters.outbound.storage.postgres_adapter import PostgreSQLStorageAdapter
30
31
  from evalvault.config.phoenix_support import get_phoenix_trace_url
31
32
  from evalvault.config.settings import Settings, apply_profile
32
33
  from evalvault.domain.entities import EvaluationRun
@@ -115,11 +116,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
115
116
  ) -> None:
116
117
  """평가 실행 결과를 분석하고 통계 인사이트를 표시합니다."""
117
118
 
118
- resolved_db_path = db_path or Settings().evalvault_db_path
119
- if resolved_db_path is None:
120
- _console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
121
- raise typer.Exit(1)
122
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
119
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
123
120
 
124
121
  try:
125
122
  run = storage.get_run(run_id)
@@ -217,7 +214,12 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
217
214
  _save_analysis_payload(bundle.causal, "causal")
218
215
  if improvement_report is not None:
219
216
  _save_analysis_payload(improvement_report, "playbook")
220
- _console.print(f"\n[green]분석 결과 DB 저장: {resolved_db_path}[/green]")
217
+ storage_label = (
218
+ "PostgreSQL"
219
+ if isinstance(storage, PostgreSQLStorageAdapter)
220
+ else f"SQLite ({db_path})"
221
+ )
222
+ _console.print(f"\n[green]분석 결과 DB 저장: {storage_label}[/green]")
221
223
 
222
224
  if dashboard:
223
225
  dashboard_gen = DashboardGenerator()
@@ -359,11 +361,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
359
361
  ) -> None:
360
362
  """두 실행을 통계적으로 비교합니다."""
361
363
 
362
- resolved_db_path = db_path or Settings().evalvault_db_path
363
- if resolved_db_path is None:
364
- _console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
365
- raise typer.Exit(1)
366
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
364
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
367
365
 
368
366
  try:
369
367
  run_a = storage.get_run(run_id1)
@@ -385,7 +385,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
385
385
  """
386
386
  try:
387
387
  from evalvault.adapters.outbound.benchmark import LMEvalAdapter
388
- from evalvault.adapters.outbound.storage import SQLiteStorageAdapter
388
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
389
389
  from evalvault.config.settings import get_settings
390
390
  from evalvault.domain.services.benchmark_service import BenchmarkService
391
391
  from evalvault.ports.outbound.benchmark_port import BenchmarkBackend
@@ -426,7 +426,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
426
426
  ensure_phoenix_instrumentation(settings, console=console, force=True)
427
427
 
428
428
  benchmark_adapter = LMEvalAdapter(settings=settings)
429
- storage_adapter = SQLiteStorageAdapter(db_path=db)
429
+ storage_adapter = build_storage_adapter(settings=settings, db_path=db)
430
430
  tracer_adapter = _create_tracer_adapter(phoenix)
431
431
  service = BenchmarkService(
432
432
  benchmark_adapter=benchmark_adapter,
@@ -556,9 +556,11 @@ def create_benchmark_app(console: Console) -> typer.Typer:
556
556
  ),
557
557
  ) -> None:
558
558
  """View past benchmark runs."""
559
- from evalvault.adapters.outbound.storage import SQLiteStorageAdapter
559
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
560
+ from evalvault.config.settings import get_settings
560
561
 
561
- storage = SQLiteStorageAdapter(db_path=db)
562
+ settings = get_settings()
563
+ storage = build_storage_adapter(settings=settings, db_path=db)
562
564
  runs = storage.list_benchmark_runs(
563
565
  benchmark_type=benchmark_type,
564
566
  model_name=model_name,
@@ -629,7 +631,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
629
631
  evalvault benchmark report abc123
630
632
  evalvault benchmark report abc123 -o report.md -p dev
631
633
  """
632
- from evalvault.adapters.outbound.storage import SQLiteStorageAdapter
634
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
633
635
  from evalvault.config.settings import get_settings
634
636
  from evalvault.domain.services.benchmark_report_service import (
635
637
  BenchmarkReportService,
@@ -639,7 +641,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
639
641
  if profile:
640
642
  settings.profile = profile
641
643
 
642
- storage = SQLiteStorageAdapter(db_path=db)
644
+ storage = build_storage_adapter(settings=settings, db_path=db)
643
645
  benchmark_run = storage.get_benchmark_run(run_id)
644
646
 
645
647
  if not benchmark_run:
@@ -717,7 +719,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
717
719
  evalvault benchmark compare abc123 def456
718
720
  evalvault benchmark compare abc123 def456 -o comparison.md
719
721
  """
720
- from evalvault.adapters.outbound.storage import SQLiteStorageAdapter
722
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
721
723
  from evalvault.config.settings import get_settings
722
724
  from evalvault.domain.services.benchmark_report_service import (
723
725
  BenchmarkReportService,
@@ -727,7 +729,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
727
729
  if profile:
728
730
  settings.profile = profile
729
731
 
730
- storage = SQLiteStorageAdapter(db_path=db)
732
+ storage = build_storage_adapter(settings=settings, db_path=db)
731
733
  baseline = storage.get_benchmark_run(baseline_id)
732
734
  target = storage.get_benchmark_run(target_id)
733
735
 
@@ -7,7 +7,7 @@ import typer
7
7
  from rich.console import Console
8
8
  from rich.table import Table
9
9
 
10
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
10
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
11
11
  from evalvault.config.settings import Settings
12
12
  from evalvault.domain.services.satisfaction_calibration_service import (
13
13
  SatisfactionCalibrationService,
@@ -36,12 +36,7 @@ def register_calibrate_commands(app: typer.Typer, console: Console) -> None:
36
36
  ),
37
37
  db_path: Path | None = db_option(help_text="DB 경로"),
38
38
  ) -> None:
39
- resolved_db_path = db_path or Settings().evalvault_db_path
40
- if resolved_db_path is None:
41
- _console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
42
- raise typer.Exit(1)
43
-
44
- storage = SQLiteStorageAdapter(db_path=resolved_db_path)
39
+ storage = build_storage_adapter(settings=Settings(), db_path=db_path)
45
40
  try:
46
41
  run = storage.get_run(run_id)
47
42
  except KeyError: