evalvault 1.74.0__py3-none-any.whl → 1.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. evalvault/adapters/inbound/api/adapter.py +28 -17
  2. evalvault/adapters/inbound/api/routers/calibration.py +9 -9
  3. evalvault/adapters/inbound/api/routers/chat.py +303 -17
  4. evalvault/adapters/inbound/api/routers/domain.py +10 -5
  5. evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
  6. evalvault/adapters/inbound/api/routers/runs.py +23 -4
  7. evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
  8. evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
  9. evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
  10. evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
  11. evalvault/adapters/inbound/cli/commands/compare.py +2 -7
  12. evalvault/adapters/inbound/cli/commands/debug.py +3 -2
  13. evalvault/adapters/inbound/cli/commands/domain.py +12 -12
  14. evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
  15. evalvault/adapters/inbound/cli/commands/gate.py +3 -2
  16. evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
  17. evalvault/adapters/inbound/cli/commands/history.py +3 -12
  18. evalvault/adapters/inbound/cli/commands/method.py +1 -2
  19. evalvault/adapters/inbound/cli/commands/ops.py +2 -2
  20. evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
  21. evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
  22. evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
  23. evalvault/adapters/inbound/cli/commands/regress.py +5 -4
  24. evalvault/adapters/inbound/cli/commands/run.py +42 -31
  25. evalvault/adapters/inbound/cli/commands/run_helpers.py +24 -15
  26. evalvault/adapters/inbound/cli/commands/stage.py +6 -25
  27. evalvault/adapters/inbound/cli/utils/options.py +10 -4
  28. evalvault/adapters/inbound/mcp/tools.py +11 -8
  29. evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
  30. evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
  31. evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
  32. evalvault/adapters/outbound/domain_memory/factory.py +68 -0
  33. evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
  34. evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
  35. evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
  36. evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
  37. evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
  38. evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
  39. evalvault/adapters/outbound/storage/base_sql.py +3 -2
  40. evalvault/adapters/outbound/storage/factory.py +53 -0
  41. evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
  42. evalvault/config/settings.py +31 -7
  43. evalvault/domain/services/domain_learning_hook.py +2 -1
  44. evalvault/ports/inbound/web_port.py +3 -1
  45. evalvault/ports/outbound/storage_port.py +2 -0
  46. evalvault-1.75.0.dist-info/METADATA +221 -0
  47. {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/RECORD +50 -45
  48. evalvault-1.74.0.dist-info/METADATA +0 -585
  49. {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/WHEEL +0 -0
  50. {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/entry_points.txt +0 -0
  51. {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -127,14 +127,15 @@ class WebUIAdapter:
127
127
  llm_adapter: LLM 어댑터 (선택적)
128
128
  data_loader: 데이터 로더 (선택적)
129
129
  """
130
- resolved_settings = settings
130
+ resolved_settings = settings or Settings()
131
131
  if storage is None:
132
- resolved_settings = settings or Settings()
133
- db_path = getattr(resolved_settings, "evalvault_db_path", None)
134
- if db_path:
135
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
132
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
136
133
 
137
- storage = SQLiteStorageAdapter(db_path=db_path)
134
+ try:
135
+ storage = build_storage_adapter(settings=resolved_settings)
136
+ except Exception as exc:
137
+ logger.warning("Storage initialization failed: %s", exc)
138
+ storage = None
138
139
 
139
140
  self._storage = storage
140
141
  self._evaluator = evaluator
@@ -450,7 +451,12 @@ class WebUIAdapter:
450
451
  memory_domain = memory_config.get("domain") or dataset.metadata.get("domain") or "default"
451
452
  memory_language = memory_config.get("language") or "ko"
452
453
  memory_augment = bool(memory_config.get("augment_context"))
453
- memory_db_path = memory_config.get("db_path") or settings.evalvault_memory_db_path
454
+ if memory_config.get("db_path"):
455
+ memory_db_path = memory_config.get("db_path")
456
+ elif settings.db_backend == "sqlite":
457
+ memory_db_path = settings.evalvault_memory_db_path
458
+ else:
459
+ memory_db_path = None
454
460
  memory_evaluator = None
455
461
  requested_thresholds = request.thresholds or {}
456
462
  if request.threshold_profile or requested_thresholds:
@@ -472,16 +478,17 @@ class WebUIAdapter:
472
478
  memory_active = False
473
479
  if memory_enabled:
474
480
  try:
475
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import (
476
- SQLiteDomainMemoryAdapter,
477
- )
481
+ from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
478
482
  from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import (
479
483
  PhoenixTracerAdapter,
480
484
  )
481
485
  from evalvault.domain.services.memory_aware_evaluator import MemoryAwareEvaluator
482
486
 
483
487
  tracer = PhoenixTracerAdapter() if tracker_provider == "phoenix" else None
484
- memory_adapter = SQLiteDomainMemoryAdapter(memory_db_path)
488
+ memory_adapter = build_domain_memory_adapter(
489
+ settings=self._settings,
490
+ db_path=Path(memory_db_path) if memory_db_path else None,
491
+ )
485
492
  memory_evaluator = MemoryAwareEvaluator(
486
493
  evaluator=self._evaluator,
487
494
  memory_port=memory_adapter,
@@ -814,6 +821,7 @@ class WebUIAdapter:
814
821
  def list_runs(
815
822
  self,
816
823
  limit: int = 50,
824
+ offset: int = 0,
817
825
  filters: RunFilters | None = None,
818
826
  ) -> list[RunSummary]:
819
827
  """평가 목록 조회.
@@ -833,7 +841,7 @@ class WebUIAdapter:
833
841
 
834
842
  try:
835
843
  # 저장소에서 평가 목록 조회
836
- runs = self._storage.list_runs(limit=limit)
844
+ runs = self._storage.list_runs(limit=limit, offset=offset)
837
845
 
838
846
  # RunSummary로 변환
839
847
  summaries = []
@@ -1029,7 +1037,11 @@ class WebUIAdapter:
1029
1037
  run = self.get_run_details(run_id)
1030
1038
  feedbacks = storage.list_feedback(run_id)
1031
1039
  if labels_source in {"feedback", "hybrid"} and not feedbacks:
1032
- raise ValueError("Feedback labels are required for this labels_source")
1040
+ raise ValueError(
1041
+ f"No feedback labels found for run '{run_id}'. "
1042
+ f"Calibration with labels_source='{labels_source}' requires at least one feedback label. "
1043
+ "Please add feedback labels via the UI or API, or use labels_source='gold' if gold labels are available."
1044
+ )
1033
1045
  resolved_metrics = metrics or list(run.metrics_evaluated)
1034
1046
  if not resolved_metrics:
1035
1047
  raise ValueError("No metrics available for calibration")
@@ -2198,16 +2210,15 @@ def create_adapter() -> WebUIAdapter:
2198
2210
  """
2199
2211
  from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
2200
2212
  from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
2201
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
2213
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
2202
2214
  from evalvault.config.settings import get_settings
2203
2215
  from evalvault.domain.services.evaluator import RagasEvaluator
2204
2216
 
2205
2217
  # 설정 로드
2206
2218
  settings = get_settings()
2207
2219
 
2208
- # Storage 생성 (기본 SQLite)
2209
- db_path = Path(settings.evalvault_db_path)
2210
- storage = SQLiteStorageAdapter(db_path=db_path)
2220
+ # Storage 생성
2221
+ storage = build_storage_adapter(settings=settings)
2211
2222
 
2212
2223
  # LLM adapter 생성 (API 키 없으면 None)
2213
2224
  llm_adapter = None
@@ -113,6 +113,15 @@ def run_judge_calibration(
113
113
  return JudgeCalibrationResponse.model_validate(payload)
114
114
 
115
115
 
116
+ @router.get("/judge/history", response_model=list[JudgeCalibrationHistoryItem])
117
+ def list_calibrations(
118
+ adapter: AdapterDep,
119
+ limit: int = Query(20, ge=1, le=200),
120
+ ) -> list[JudgeCalibrationHistoryItem]:
121
+ entries = adapter.list_judge_calibrations(limit=limit)
122
+ return [JudgeCalibrationHistoryItem.model_validate(entry) for entry in entries]
123
+
124
+
116
125
  @router.get("/judge/{calibration_id}", response_model=JudgeCalibrationResponse)
117
126
  def get_calibration_result(calibration_id: str, adapter: AdapterDep) -> JudgeCalibrationResponse:
118
127
  try:
@@ -122,12 +131,3 @@ def get_calibration_result(calibration_id: str, adapter: AdapterDep) -> JudgeCal
122
131
  except RuntimeError as exc:
123
132
  raise HTTPException(status_code=500, detail=str(exc)) from exc
124
133
  return JudgeCalibrationResponse.model_validate(payload)
125
-
126
-
127
- @router.get("/judge/history", response_model=list[JudgeCalibrationHistoryItem])
128
- def list_calibrations(
129
- adapter: AdapterDep,
130
- limit: int = Query(20, ge=1, le=200),
131
- ) -> list[JudgeCalibrationHistoryItem]:
132
- entries = adapter.list_judge_calibrations(limit=limit)
133
- return [JudgeCalibrationHistoryItem.model_validate(entry) for entry in entries]
@@ -8,6 +8,7 @@ import os
8
8
  import re
9
9
  import time
10
10
  from collections.abc import AsyncGenerator
11
+ from dataclasses import dataclass
11
12
  from datetime import UTC, datetime
12
13
  from pathlib import Path
13
14
  from typing import Any
@@ -34,6 +35,13 @@ _RAG_TEXTS: list[str] = []
34
35
  _RAG_INITIALIZED = False
35
36
 
36
37
 
38
+ @dataclass(frozen=True)
39
+ class _RagHit:
40
+ document: str
41
+ score: float
42
+ doc_id: int
43
+
44
+
37
45
  class ChatMessage(BaseModel):
38
46
  role: str
39
47
  content: str
@@ -315,14 +323,121 @@ async def _get_rag_retriever() -> tuple[Any | None, int]:
315
323
  if not _RAG_TEXTS:
316
324
  return None, 0
317
325
 
318
- from evalvault.adapters.outbound.nlp.korean.bm25_retriever import KoreanBM25Retriever
319
- from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
326
+ from evalvault.adapters.outbound.nlp.korean.toolkit import KoreanNLPToolkit
327
+
328
+ use_hybrid = os.getenv("EVALVAULT_RAG_USE_HYBRID", "true").lower() == "true"
329
+ embedding_profile = os.getenv("EVALVAULT_RAG_EMBEDDING_PROFILE", "dev")
330
+ vector_store = os.getenv("EVALVAULT_RAG_VECTOR_STORE", "pgvector").lower()
331
+ pgvector_index = os.getenv("EVALVAULT_RAG_PGVECTOR_INDEX", "hnsw").lower()
332
+ pgvector_index_lists = int(os.getenv("EVALVAULT_RAG_PGVECTOR_INDEX_LISTS", "100"))
333
+ pgvector_hnsw_m = int(os.getenv("EVALVAULT_RAG_PGVECTOR_HNSW_M", "16"))
334
+ pgvector_hnsw_ef = int(os.getenv("EVALVAULT_RAG_PGVECTOR_HNSW_EF_CONSTRUCTION", "64"))
320
335
 
321
- tokenizer = KiwiTokenizer()
322
- retriever = KoreanBM25Retriever(tokenizer=tokenizer)
323
- retriever.index(list(_RAG_TEXTS))
324
- if tokens and len(tokens) == len(_RAG_TEXTS):
325
- retriever._tokenized_docs = tokens
336
+ def _build_conn_string() -> str | None:
337
+ try:
338
+ from evalvault.config.settings import Settings
339
+
340
+ settings = Settings()
341
+ if settings.postgres_connection_string:
342
+ return settings.postgres_connection_string
343
+ if settings.postgres_host:
344
+ return "host={host} port={port} dbname={dbname} user={user} password={password}".format(
345
+ host=settings.postgres_host,
346
+ port=settings.postgres_port,
347
+ dbname=settings.postgres_database,
348
+ user=settings.postgres_user or "postgres",
349
+ password=settings.postgres_password or "",
350
+ )
351
+ except Exception as exc:
352
+ logger.warning("Failed to build Postgres connection string: %s", exc)
353
+ return None
354
+
355
+ ollama_adapter = None
356
+ dense_retriever = None
357
+ embedding_func = None
358
+ if embedding_profile:
359
+ try:
360
+ from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
361
+ from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
362
+ from evalvault.config.settings import Settings
363
+
364
+ settings = Settings()
365
+ ollama_adapter = OllamaAdapter(settings)
366
+ dense_retriever = KoreanDenseRetriever(
367
+ profile=embedding_profile,
368
+ ollama_adapter=ollama_adapter,
369
+ )
370
+ embedding_func = dense_retriever.get_embedding_func()
371
+ except Exception as exc: # pragma: no cover - runtime dependency
372
+ logger.warning("Failed to initialize dense retriever: %s", exc)
373
+
374
+ if vector_store == "pgvector" and embedding_func is not None:
375
+ conn_string = _build_conn_string()
376
+ if conn_string:
377
+ try:
378
+ from evalvault.adapters.outbound.nlp.korean.bm25_retriever import (
379
+ KoreanBM25Retriever,
380
+ )
381
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
382
+ from evalvault.adapters.outbound.retriever.pgvector_store import PgvectorStore
383
+
384
+ store = PgvectorStore(
385
+ conn_string,
386
+ index_type=pgvector_index,
387
+ index_lists=pgvector_index_lists,
388
+ hnsw_m=pgvector_hnsw_m,
389
+ hnsw_ef_construction=pgvector_hnsw_ef,
390
+ )
391
+ embedding_dim = (
392
+ dense_retriever.dimension if dense_retriever else len(embedding_func(["x"])[0])
393
+ )
394
+ store.ensure_schema(dimension=embedding_dim)
395
+ source_hash = _hash_text(content)
396
+ existing_hash, existing_count = store.get_source_state(source="user_guide")
397
+ if existing_hash != source_hash or existing_count != len(_RAG_TEXTS):
398
+ embeddings = embedding_func(list(_RAG_TEXTS))
399
+ store.replace_documents(
400
+ source="user_guide",
401
+ source_hash=source_hash,
402
+ documents=list(_RAG_TEXTS),
403
+ embeddings=embeddings,
404
+ )
405
+
406
+ tokenizer = KiwiTokenizer()
407
+ bm25_retriever = KoreanBM25Retriever(tokenizer=tokenizer)
408
+ bm25_retriever.index(list(_RAG_TEXTS))
409
+ if tokens and len(tokens) == len(_RAG_TEXTS):
410
+ bm25_retriever._tokenized_docs = tokens
411
+
412
+ if use_hybrid:
413
+ retriever = _PgvectorHybridRetriever(
414
+ bm25_retriever=bm25_retriever,
415
+ store=store,
416
+ embedding_func=embedding_func,
417
+ documents=list(_RAG_TEXTS),
418
+ )
419
+ else:
420
+ retriever = _PgvectorDenseRetriever(
421
+ store=store,
422
+ embedding_func=embedding_func,
423
+ documents=list(_RAG_TEXTS),
424
+ )
425
+
426
+ _RAG_RETRIEVER = retriever
427
+ return retriever, _RAG_DOCS_COUNT
428
+ except Exception as exc:
429
+ logger.warning("pgvector retriever setup failed: %s", exc)
430
+
431
+ toolkit = KoreanNLPToolkit()
432
+ retriever = toolkit.build_retriever(
433
+ list(_RAG_TEXTS),
434
+ use_hybrid=use_hybrid,
435
+ ollama_adapter=ollama_adapter,
436
+ embedding_profile=embedding_profile,
437
+ verbose=True,
438
+ )
439
+ if retriever is None:
440
+ return None, 0
326
441
 
327
442
  _RAG_RETRIEVER = retriever
328
443
  return retriever, _RAG_DOCS_COUNT
@@ -384,11 +499,153 @@ def _simple_retrieve(texts: list[str], query: str, top_k: int) -> list[str]:
384
499
  return [text for _, text in scored[:top_k]]
385
500
 
386
501
 
502
+ def _rrf_fuse(
503
+ *,
504
+ bm25_results: list[Any],
505
+ dense_results: list[Any],
506
+ documents: list[str],
507
+ top_k: int,
508
+ bm25_weight: float = 0.4,
509
+ dense_weight: float = 0.6,
510
+ rrf_k: int = 60,
511
+ ) -> list[_RagHit]:
512
+ scores: dict[int, float] = {}
513
+
514
+ for rank, result in enumerate(bm25_results, 1):
515
+ doc_id = int(result.doc_id)
516
+ scores[doc_id] = scores.get(doc_id, 0.0) + (bm25_weight / (rrf_k + rank))
517
+
518
+ for rank, result in enumerate(dense_results, 1):
519
+ doc_id = int(result.doc_id)
520
+ scores[doc_id] = scores.get(doc_id, 0.0) + (dense_weight / (rrf_k + rank))
521
+
522
+ ranked = sorted(scores.items(), key=lambda item: item[1], reverse=True)
523
+ hits: list[_RagHit] = []
524
+ for doc_id, score in ranked[:top_k]:
525
+ if 0 <= doc_id < len(documents):
526
+ hits.append(_RagHit(document=documents[doc_id], score=score, doc_id=doc_id))
527
+ return hits
528
+
529
+
530
+ class _PgvectorDenseRetriever:
531
+ def __init__(self, store: Any, embedding_func: Any, documents: list[str]) -> None:
532
+ self._store = store
533
+ self._embedding_func = embedding_func
534
+ self._documents = documents
535
+
536
+ def search(self, query: str, top_k: int = 5) -> list[_RagHit]:
537
+ query_embedding = self._embedding_func([query])[0]
538
+ results = self._store.search(
539
+ source="user_guide", query_embedding=query_embedding, top_k=top_k
540
+ )
541
+ hits: list[_RagHit] = []
542
+ for result in results:
543
+ if 0 <= result.doc_id < len(self._documents):
544
+ hits.append(
545
+ _RagHit(
546
+ document=self._documents[result.doc_id],
547
+ score=float(result.score),
548
+ doc_id=result.doc_id,
549
+ )
550
+ )
551
+ return hits
552
+
553
+
554
+ class _PgvectorHybridRetriever:
555
+ def __init__(
556
+ self,
557
+ *,
558
+ bm25_retriever: Any,
559
+ store: Any,
560
+ embedding_func: Any,
561
+ documents: list[str],
562
+ ) -> None:
563
+ self._bm25 = bm25_retriever
564
+ self._store = store
565
+ self._embedding_func = embedding_func
566
+ self._documents = documents
567
+
568
+ def search(self, query: str, top_k: int = 5) -> list[_RagHit]:
569
+ bm25_results = self._bm25.search(query, top_k=len(self._documents))
570
+ query_embedding = self._embedding_func([query])[0]
571
+ dense_results = self._store.search(
572
+ source="user_guide", query_embedding=query_embedding, top_k=len(self._documents)
573
+ )
574
+ dense_results = sorted(dense_results, key=lambda item: item.score)
575
+ return _rrf_fuse(
576
+ bm25_results=bm25_results,
577
+ dense_results=dense_results,
578
+ documents=self._documents,
579
+ top_k=top_k,
580
+ )
581
+
582
+
583
+ def _read_text_limited(path: Path, limit: int = 4000) -> str | None:
584
+ try:
585
+ if not path.exists():
586
+ return None
587
+ content = path.read_text(encoding="utf-8", errors="ignore")
588
+ except Exception as exc:
589
+ logger.warning("Failed to read %s: %s", path, exc)
590
+ return None
591
+ content = content.strip()
592
+ if not content:
593
+ return None
594
+ if len(content) > limit:
595
+ return content[:limit] + "..."
596
+ return content
597
+
598
+
599
+ async def _build_run_context(run_id: str) -> list[str]:
600
+ contexts: list[str] = []
601
+ try:
602
+ summary_result = await _call_mcp_tool("get_run_summary", {"run_id": run_id})
603
+ payload = _extract_json_content(summary_result)
604
+ if isinstance(payload, dict):
605
+ contexts.append("[RUN 요약]\n" + _summarize_run_summary(payload))
606
+ except Exception as exc:
607
+ logger.warning("Failed to fetch run summary: %s", exc)
608
+
609
+ try:
610
+ artifacts_result = await _call_mcp_tool(
611
+ "get_artifacts", {"run_id": run_id, "kind": "analysis"}
612
+ )
613
+ payload = _extract_json_content(artifacts_result)
614
+ if isinstance(payload, dict):
615
+ contexts.append("[RUN 아티팩트]\n" + _summarize_artifacts(payload))
616
+ artifacts = payload.get("artifacts") or {}
617
+ report_path = artifacts.get("report_path")
618
+ if isinstance(report_path, str) and report_path:
619
+ report_text = _read_text_limited(Path(report_path))
620
+ if report_text:
621
+ contexts.append("[REPORT 발췌]\n" + report_text)
622
+ except Exception as exc:
623
+ logger.warning("Failed to fetch run artifacts: %s", exc)
624
+
625
+ return contexts
626
+
627
+
387
628
  async def _rag_answer(
388
629
  user_text: str, run_id: str | None = None, category: str | None = None
389
630
  ) -> str | None:
390
- retriever, _ = await _get_rag_retriever()
391
631
  contexts: list[str] = []
632
+ rag_llm_enabled = os.getenv("EVALVAULT_RAG_LLM_ENABLED", "true").lower() == "true"
633
+ run_context_enabled = os.getenv("EVALVAULT_CHAT_RUN_CONTEXT_ENABLED", "true").lower() == "true"
634
+
635
+ if run_id and rag_llm_enabled and run_context_enabled:
636
+ contexts.extend(await _build_run_context(run_id))
637
+
638
+ if not rag_llm_enabled and contexts:
639
+ return "\n\n".join(contexts[:3])
640
+
641
+ if not rag_llm_enabled:
642
+ content = _load_user_guide_text()
643
+ if content:
644
+ chunks = [chunk.strip() for chunk in content.split("\n\n") if chunk.strip()]
645
+ contexts.extend(_simple_retrieve(chunks, user_text, top_k=5))
646
+ return "\n\n".join(contexts[:3]) if contexts else None
647
+
648
+ retriever, _ = await _get_rag_retriever()
392
649
 
393
650
  if retriever is not None:
394
651
  results = retriever.search(user_text, top_k=5)
@@ -403,7 +660,7 @@ async def _rag_answer(
403
660
  if not contexts:
404
661
  return None
405
662
 
406
- if os.getenv("EVALVAULT_RAG_LLM_ENABLED", "true").lower() != "true":
663
+ if not rag_llm_enabled:
407
664
  return "\n\n".join(contexts[:3])
408
665
 
409
666
  prompt = (
@@ -431,15 +688,24 @@ async def _rag_answer(
431
688
  if options:
432
689
  payload["options"] = options
433
690
 
434
- async with httpx.AsyncClient(timeout=60) as client:
435
- response = await client.post(
436
- f"{os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')}/api/chat",
437
- json=payload,
438
- )
439
- response.raise_for_status()
440
- data = response.json()
691
+ fallback = "\n\n".join(contexts[:3])
692
+ chat_timeout = int(os.getenv("OLLAMA_CHAT_TIMEOUT_SECONDS", "180"))
693
+ try:
694
+ async with httpx.AsyncClient(timeout=chat_timeout) as client:
695
+ response = await client.post(
696
+ f"{os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')}/api/chat",
697
+ json=payload,
698
+ )
699
+ response.raise_for_status()
700
+ data = response.json()
701
+ except httpx.ReadTimeout:
702
+ logger.warning("Ollama chat timed out; returning retrieved contexts")
703
+ return fallback or None
704
+ except httpx.HTTPError as exc:
705
+ logger.warning("Ollama chat failed: %s", exc)
706
+ return fallback or None
441
707
 
442
- return data.get("message", {}).get("content", "").strip() or None
708
+ return data.get("message", {}).get("content", "").strip() or fallback or None
443
709
 
444
710
 
445
711
  async def _call_mcp_tool(tool_name: str, tool_args: dict[str, Any]) -> Any:
@@ -665,6 +931,17 @@ async def _chat_stream(
665
931
  user_text: str, run_id: str | None = None, category: str | None = None
666
932
  ) -> AsyncGenerator[str, None]:
667
933
  started_at = time.perf_counter()
934
+ simple_mode = os.getenv("EVALVAULT_CHAT_SIMPLE_MODE", "false").lower() == "true"
935
+ run_context_enabled = os.getenv("EVALVAULT_CHAT_RUN_CONTEXT_ENABLED", "true").lower() == "true"
936
+ if simple_mode:
937
+ yield _event({"type": "status", "message": "간단 채팅 처리 중..."})
938
+ answer = await _direct_chat_answer(user_text)
939
+ if answer:
940
+ async for item in _emit_answer(answer):
941
+ yield item
942
+ else:
943
+ yield _event({"type": "final", "content": "답변을 생성하지 못했습니다."})
944
+ return
668
945
  if category in {"result_interpretation", "improvement_direction"} and not run_id:
669
946
  yield _event(
670
947
  {
@@ -700,6 +977,7 @@ async def _chat_stream(
700
977
  _is_verb_only(user_text)
701
978
  and category in {"result_interpretation", "improvement_direction"}
702
979
  and run_id
980
+ and run_context_enabled
703
981
  ):
704
982
  yield _event({"type": "status", "message": "선택한 run 요약 중..."})
705
983
  try:
@@ -807,6 +1085,14 @@ async def _chat_stream(
807
1085
  if tool_name == "get_artifacts" and not (tool_args.get("run_id") or run_id):
808
1086
  yield _event({"type": "final", "content": "아티팩트 조회를 위해 run_id가 필요합니다."})
809
1087
  return
1088
+ if not run_context_enabled and tool_name in {"get_run_summary", "get_artifacts"}:
1089
+ yield _event(
1090
+ {
1091
+ "type": "final",
1092
+ "content": "run 요약/아티팩트 조회가 비활성화되어 있습니다.",
1093
+ }
1094
+ )
1095
+ return
810
1096
  if tool_name == "analyze_compare" and (
811
1097
  not tool_args.get("run_id_a") or not tool_args.get("run_id_b")
812
1098
  ):
@@ -5,17 +5,22 @@ from __future__ import annotations
5
5
  from fastapi import APIRouter, HTTPException
6
6
  from pydantic import BaseModel
7
7
 
8
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
8
+ from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
9
9
  from evalvault.config.settings import get_settings
10
+ from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
10
11
 
11
12
  router = APIRouter()
12
- DEFAULT_MEMORY_DB_PATH = get_settings().evalvault_memory_db_path
13
+ _settings = get_settings()
14
+ DEFAULT_MEMORY_DB_PATH = (
15
+ _settings.evalvault_memory_db_path if _settings.db_backend == "sqlite" else None
16
+ )
13
17
 
14
18
 
15
- # --- Dependencies ---
16
- def get_memory_adapter(db_path: str = DEFAULT_MEMORY_DB_PATH) -> SQLiteDomainMemoryAdapter:
19
+ def get_memory_adapter(db_path: str | None = DEFAULT_MEMORY_DB_PATH) -> DomainMemoryPort:
17
20
  """Get memory adapter instance."""
18
- return SQLiteDomainMemoryAdapter(db_path)
21
+ from pathlib import Path
22
+
23
+ return build_domain_memory_adapter(db_path=Path(db_path) if db_path else None)
19
24
 
20
25
 
21
26
  # --- Pydantic Models ---
@@ -8,7 +8,7 @@ from fastapi.encoders import jsonable_encoder
8
8
  from pydantic import BaseModel
9
9
 
10
10
  from evalvault.adapters.outbound.llm import get_llm_adapter
11
- from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
11
+ from evalvault.adapters.outbound.storage.factory import build_storage_adapter
12
12
  from evalvault.config.settings import get_settings
13
13
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
14
14
  from evalvault.domain.metrics.analysis_registry import list_analysis_metric_specs
@@ -264,9 +264,9 @@ def _intent_label(intent_value: str) -> str:
264
264
  return meta["label"] if meta else intent.value
265
265
 
266
266
 
267
- def _build_pipeline_service() -> tuple[AnalysisPipelineService, SQLiteStorageAdapter]:
267
+ def _build_pipeline_service() -> tuple[AnalysisPipelineService, Any]:
268
268
  settings = get_settings()
269
- storage = SQLiteStorageAdapter(db_path=settings.evalvault_db_path)
269
+ storage = build_storage_adapter(settings=settings)
270
270
  llm_adapter = None
271
271
  try:
272
272
  llm_adapter = get_llm_adapter(settings)
@@ -21,7 +21,7 @@ from evalvault.adapters.outbound.dataset.templates import (
21
21
  render_dataset_template_xlsx,
22
22
  )
23
23
  from evalvault.adapters.outbound.debug.report_renderer import render_markdown
24
- from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
24
+ from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
25
25
  from evalvault.adapters.outbound.report import DashboardGenerator
26
26
  from evalvault.config.settings import get_settings
27
27
  from evalvault.domain.entities import (
@@ -64,6 +64,7 @@ class RunSummaryResponse(BaseModel):
64
64
  phoenix_precision: float | None = None
65
65
  phoenix_drift: float | None = None
66
66
  phoenix_experiment_url: str | None = None
67
+ feedback_count: int | None = None
67
68
 
68
69
  model_config = {"from_attributes": True}
69
70
 
@@ -908,11 +909,20 @@ async def start_evaluation_endpoint(
908
909
  )
909
910
 
910
911
  try:
912
+ from pathlib import Path
913
+
911
914
  settings = get_settings()
912
- memory_db = memory_config.get("db_path") or settings.evalvault_memory_db_path
915
+ if memory_config.get("db_path"):
916
+ memory_db = memory_config.get("db_path")
917
+ elif settings.db_backend == "sqlite":
918
+ memory_db = settings.evalvault_memory_db_path
919
+ else:
920
+ memory_db = None
913
921
  domain = memory_config.get("domain") or "default"
914
922
  language = memory_config.get("language") or "ko"
915
- memory_adapter = SQLiteDomainMemoryAdapter(memory_db)
923
+ memory_adapter = build_domain_memory_adapter(
924
+ settings=settings, db_path=Path(memory_db) if memory_db else None
925
+ )
916
926
  hook = DomainLearningHook(memory_adapter)
917
927
  await hook.on_evaluation_complete(
918
928
  evaluation_run=result,
@@ -944,14 +954,22 @@ async def start_evaluation_endpoint(
944
954
  def list_runs(
945
955
  adapter: AdapterDep,
946
956
  limit: int = 50,
957
+ offset: int = Query(0, ge=0, description="Pagination offset"),
947
958
  dataset_name: str | None = Query(None, description="Filter by dataset name"),
948
959
  model_name: str | None = Query(None, description="Filter by model name"),
960
+ include_feedback: bool = Query(False, description="Include feedback count"),
949
961
  ) -> list[Any]:
950
962
  """List evaluation runs."""
951
963
  from evalvault.ports.inbound.web_port import RunFilters
952
964
 
953
965
  filters = RunFilters(dataset_name=dataset_name, model_name=model_name)
954
- summaries = adapter.list_runs(limit=limit, filters=filters)
966
+ summaries = adapter.list_runs(limit=limit, offset=offset, filters=filters)
967
+ feedback_counts: dict[str, int] = {}
968
+ if include_feedback:
969
+ feedback_counts = {
970
+ summary.run_id: adapter.get_feedback_summary(summary.run_id).total_feedback
971
+ for summary in summaries
972
+ }
955
973
 
956
974
  # Convert RunSummary dataclass to dict/Pydantic compatible format
957
975
  # The adapter returns RunSummary objects which matches our response model mostly
@@ -975,6 +993,7 @@ def list_runs(
975
993
  "phoenix_precision": s.phoenix_precision,
976
994
  "phoenix_drift": s.phoenix_drift,
977
995
  "phoenix_experiment_url": s.phoenix_experiment_url,
996
+ "feedback_count": feedback_counts.get(s.run_id) if include_feedback else None,
978
997
  }
979
998
  for s in summaries
980
999
  ]