evalvault 1.62.1__py3-none-any.whl → 1.63.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. evalvault/adapters/inbound/api/adapter.py +190 -19
  2. evalvault/adapters/inbound/api/routers/runs.py +66 -2
  3. evalvault/adapters/inbound/cli/commands/method.py +5 -2
  4. evalvault/adapters/inbound/cli/commands/prompts.py +613 -5
  5. evalvault/adapters/inbound/cli/commands/run.py +88 -5
  6. evalvault/adapters/inbound/cli/commands/run_helpers.py +12 -0
  7. evalvault/adapters/inbound/mcp/tools.py +5 -2
  8. evalvault/adapters/outbound/analysis/ragas_evaluator_module.py +13 -9
  9. evalvault/adapters/outbound/improvement/pattern_detector.py +1 -1
  10. evalvault/adapters/outbound/improvement/playbook_loader.py +1 -1
  11. evalvault/adapters/outbound/llm/__init__.py +5 -43
  12. evalvault/adapters/outbound/llm/anthropic_adapter.py +27 -7
  13. evalvault/adapters/outbound/llm/factory.py +103 -0
  14. evalvault/adapters/outbound/llm/llm_relation_augmenter.py +39 -14
  15. evalvault/adapters/outbound/llm/ollama_adapter.py +34 -10
  16. evalvault/adapters/outbound/llm/openai_adapter.py +41 -8
  17. evalvault/adapters/outbound/llm/token_aware_chat.py +21 -2
  18. evalvault/adapters/outbound/llm/vllm_adapter.py +39 -8
  19. evalvault/adapters/outbound/nlp/korean/toolkit_factory.py +20 -0
  20. evalvault/adapters/outbound/report/llm_report_generator.py +90 -6
  21. evalvault/adapters/outbound/storage/base_sql.py +528 -21
  22. evalvault/adapters/outbound/storage/postgres_adapter.py +209 -0
  23. evalvault/adapters/outbound/storage/postgres_schema.sql +38 -0
  24. evalvault/adapters/outbound/storage/sqlite_adapter.py +86 -5
  25. evalvault/debug_ragas.py +7 -1
  26. evalvault/debug_ragas_real.py +5 -1
  27. evalvault/domain/entities/__init__.py +10 -0
  28. evalvault/domain/entities/prompt_suggestion.py +50 -0
  29. evalvault/domain/services/__init__.py +6 -0
  30. evalvault/domain/services/evaluator.py +191 -103
  31. evalvault/domain/services/holdout_splitter.py +67 -0
  32. evalvault/domain/services/intent_classifier.py +73 -0
  33. evalvault/domain/services/pipeline_template_registry.py +3 -0
  34. evalvault/domain/services/prompt_candidate_service.py +117 -0
  35. evalvault/domain/services/prompt_registry.py +40 -2
  36. evalvault/domain/services/prompt_scoring_service.py +286 -0
  37. evalvault/domain/services/prompt_suggestion_reporter.py +277 -0
  38. evalvault/domain/services/synthetic_qa_generator.py +4 -3
  39. evalvault/ports/inbound/learning_hook_port.py +4 -1
  40. evalvault/ports/outbound/__init__.py +2 -0
  41. evalvault/ports/outbound/llm_factory_port.py +13 -0
  42. evalvault/ports/outbound/llm_port.py +34 -2
  43. evalvault/ports/outbound/storage_port.py +38 -0
  44. {evalvault-1.62.1.dist-info → evalvault-1.63.1.dist-info}/METADATA +228 -4
  45. {evalvault-1.62.1.dist-info → evalvault-1.63.1.dist-info}/RECORD +48 -40
  46. {evalvault-1.62.1.dist-info → evalvault-1.63.1.dist-info}/WHEEL +0 -0
  47. {evalvault-1.62.1.dist-info → evalvault-1.63.1.dist-info}/entry_points.txt +0 -0
  48. {evalvault-1.62.1.dist-info → evalvault-1.63.1.dist-info}/licenses/LICENSE.md +0 -0
@@ -20,7 +20,8 @@ from evalvault.adapters.outbound.documents.versioned_loader import (
20
20
  load_versioned_chunks_from_pdf_dir,
21
21
  )
22
22
  from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
23
- from evalvault.adapters.outbound.llm import get_llm_adapter
23
+ from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
24
+ from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
24
25
  from evalvault.adapters.outbound.phoenix.sync_service import (
25
26
  PhoenixDatasetInfo,
26
27
  PhoenixSyncError,
@@ -39,6 +40,7 @@ from evalvault.domain.services.memory_based_analysis import MemoryBasedAnalysis
39
40
  from evalvault.domain.services.prompt_registry import (
40
41
  PromptInput,
41
42
  build_prompt_bundle,
43
+ build_prompt_inputs_from_snapshots,
42
44
  build_prompt_summary,
43
45
  )
44
46
  from evalvault.domain.services.ragas_prompt_overrides import (
@@ -211,7 +213,9 @@ def register_run_commands(
211
213
  None,
212
214
  "--output",
213
215
  "-o",
214
- help="Output file for results (JSON format).",
216
+ help=(
217
+ "Output file for results (JSON format). If .xlsx/.xls, exports Excel via DB save."
218
+ ),
215
219
  ),
216
220
  auto_analyze: bool = typer.Option(
217
221
  False,
@@ -808,6 +812,30 @@ def register_run_commands(
808
812
  if profile_name:
809
813
  settings = apply_profile(settings, profile_name)
810
814
 
815
+ if db_path is None:
816
+ db_path = Path(settings.evalvault_db_path)
817
+
818
+ excel_output: Path | None = None
819
+ if output and output.suffix.lower() in {".xlsx", ".xls"}:
820
+ excel_output = output
821
+ output = None
822
+ if db_path is None:
823
+ print_cli_error(
824
+ console,
825
+ "엑셀 출력은 DB 저장이 필요합니다.",
826
+ fixes=["--db <sqlite_path> 옵션을 함께 지정하세요."],
827
+ )
828
+ raise typer.Exit(1)
829
+ print_cli_warning(
830
+ console,
831
+ "엑셀 출력은 DB 저장이 필수이며, 지정한 경로로만 저장됩니다.",
832
+ tips=[
833
+ f"DB 저장 경로: {db_path}",
834
+ "기본 DB 엑셀은 생성하지 않습니다.",
835
+ "필요 시 --db로 경로를 변경하세요.",
836
+ ],
837
+ )
838
+
811
839
  # Override model if specified
812
840
  if model:
813
841
  if _is_oss_open_model(model) and settings.llm_provider != "vllm":
@@ -1436,7 +1464,9 @@ def register_run_commands(
1436
1464
  if should_enable_phoenix:
1437
1465
  ensure_phoenix_instrumentation(settings, console=console, force=True)
1438
1466
 
1439
- evaluator = RagasEvaluator()
1467
+ llm_factory = SettingsLLMFactory(settings)
1468
+ korean_toolkit = try_create_korean_toolkit()
1469
+ evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
1440
1470
  llm_adapter = None
1441
1471
  try:
1442
1472
  llm_adapter = get_llm_adapter(settings)
@@ -1707,6 +1737,40 @@ def register_run_commands(
1707
1737
  result.retrieval_metadata = merged_retriever_metadata
1708
1738
 
1709
1739
  result.tracker_metadata.setdefault("run_mode", preset.name)
1740
+ tracker_meta = result.tracker_metadata or {}
1741
+ result.tracker_metadata = tracker_meta
1742
+ ragas_snapshots = tracker_meta.get("ragas_prompt_snapshots")
1743
+ ragas_snapshot_inputs = build_prompt_inputs_from_snapshots(
1744
+ ragas_snapshots if isinstance(ragas_snapshots, dict) else None,
1745
+ )
1746
+ override_status: dict[str, str] = {}
1747
+ raw_override = tracker_meta.get("ragas_prompt_overrides")
1748
+ if isinstance(raw_override, dict):
1749
+ override_status = cast(dict[str, str], raw_override)
1750
+ if override_status:
1751
+ prompt_inputs = [
1752
+ entry
1753
+ for entry in prompt_inputs
1754
+ if not (
1755
+ entry.kind == "ragas"
1756
+ and override_status.get(entry.role) is not None
1757
+ and override_status.get(entry.role) != "applied"
1758
+ )
1759
+ ]
1760
+
1761
+ if ragas_snapshot_inputs:
1762
+ existing_roles = {entry.role for entry in prompt_inputs if entry.kind == "ragas"}
1763
+ for entry in ragas_snapshot_inputs:
1764
+ if entry.role in existing_roles and override_status.get(entry.role) == "applied":
1765
+ continue
1766
+ prompt_inputs.append(entry)
1767
+ if prompt_inputs and not db_path:
1768
+ print_cli_warning(
1769
+ console,
1770
+ "Prompt snapshot은 --db 저장 시에만 DB에 기록됩니다.",
1771
+ tips=["--db data/db/evalvault.db 옵션을 추가하세요."],
1772
+ )
1773
+
1710
1774
  if prompt_inputs:
1711
1775
  prompt_bundle = build_prompt_bundle(
1712
1776
  run_id=result.run_id,
@@ -1913,8 +1977,23 @@ def register_run_commands(
1913
1977
  console,
1914
1978
  storage_cls=SQLiteStorageAdapter,
1915
1979
  prompt_bundle=prompt_bundle,
1980
+ export_excel=excel_output is None,
1916
1981
  )
1917
1982
  _log_duration(console, verbose, "DB 저장 완료", db_started_at)
1983
+ if excel_output:
1984
+ excel_started_at = datetime.now()
1985
+ _log_timestamp(console, verbose, f"엑셀 저장 시작 ({excel_output})")
1986
+ try:
1987
+ storage = SQLiteStorageAdapter(db_path=db_path)
1988
+ storage.export_run_to_excel(result.run_id, excel_output)
1989
+ console.print(f"[green]Excel export saved: {excel_output}[/green]")
1990
+ except Exception as exc:
1991
+ print_cli_warning(
1992
+ console,
1993
+ "엑셀 내보내기에 실패했습니다.",
1994
+ tips=[str(exc)],
1995
+ )
1996
+ _log_duration(console, verbose, "엑셀 저장 완료", excel_started_at)
1918
1997
  if output:
1919
1998
  output_started_at = datetime.now()
1920
1999
  _log_timestamp(console, verbose, f"결과 저장 시작 ({output})")
@@ -2019,7 +2098,9 @@ def register_run_commands(
2019
2098
  None,
2020
2099
  "--output",
2021
2100
  "-o",
2022
- help="Output file for results (JSON format).",
2101
+ help=(
2102
+ "Output file for results (JSON format). If .xlsx/.xls, exports Excel via DB save."
2103
+ ),
2023
2104
  ),
2024
2105
  auto_analyze: bool = typer.Option(
2025
2106
  False,
@@ -2303,7 +2384,9 @@ def register_run_commands(
2303
2384
  None,
2304
2385
  "--output",
2305
2386
  "-o",
2306
- help="Output file for results (JSON format).",
2387
+ help=(
2388
+ "Output file for results (JSON format). If .xlsx/.xls, exports Excel via DB save."
2389
+ ),
2307
2390
  ),
2308
2391
  auto_analyze: bool = typer.Option(
2309
2392
  False,
@@ -430,6 +430,7 @@ def _save_to_db(
430
430
  *,
431
431
  storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
432
432
  prompt_bundle: PromptSetBundle | None = None,
433
+ export_excel: bool = True,
433
434
  ) -> None:
434
435
  """Persist evaluation run (and optional prompt set) to SQLite database."""
435
436
  with console.status(f"[bold green]Saving to database {db_path}..."):
@@ -443,6 +444,17 @@ def _save_to_db(
443
444
  result.run_id,
444
445
  prompt_bundle.prompt_set.prompt_set_id,
445
446
  )
447
+ if export_excel:
448
+ excel_path = db_path.parent / f"evalvault_run_{result.run_id}.xlsx"
449
+ try:
450
+ storage.export_run_to_excel(result.run_id, excel_path)
451
+ console.print(f"[green]Excel export saved: {excel_path}[/green]")
452
+ except Exception as exc:
453
+ print_cli_warning(
454
+ console,
455
+ "엑셀 내보내기에 실패했습니다.",
456
+ tips=[str(exc)],
457
+ )
446
458
  console.print(f"[green]Results saved to database: {db_path}[/green]")
447
459
  console.print(f"[dim]Run ID: {result.run_id}[/dim]")
448
460
  if prompt_bundle:
@@ -18,7 +18,8 @@ from evalvault.adapters.inbound.cli.utils.analysis_io import (
18
18
  )
19
19
  from evalvault.adapters.outbound.analysis.pipeline_factory import build_analysis_pipeline_service
20
20
  from evalvault.adapters.outbound.analysis.statistical_adapter import StatisticalAnalysisAdapter
21
- from evalvault.adapters.outbound.llm import get_llm_adapter
21
+ from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
22
+ from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
22
23
  from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
23
24
  from evalvault.config.settings import Settings, apply_profile
24
25
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
@@ -175,7 +176,9 @@ def run_evaluation(payload: dict[str, Any] | RunEvaluationRequest) -> RunEvaluat
175
176
  )
176
177
 
177
178
  storage = SQLiteStorageAdapter(db_path=db_path)
178
- evaluator = RagasEvaluator()
179
+ llm_factory = SettingsLLMFactory(settings)
180
+ korean_toolkit = try_create_korean_toolkit()
181
+ evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
179
182
  adapter = WebUIAdapter(
180
183
  storage=storage,
181
184
  evaluator=evaluator,
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Any
5
+ from typing import Any, cast
6
6
 
7
7
  from evalvault.adapters.outbound.analysis.base_module import BaseAnalysisModule
8
8
  from evalvault.adapters.outbound.analysis.pipeline_helpers import (
@@ -12,6 +12,9 @@ from evalvault.adapters.outbound.analysis.pipeline_helpers import (
12
12
  safe_mean,
13
13
  truncate_text,
14
14
  )
15
+ from evalvault.adapters.outbound.llm import SettingsLLMFactory
16
+ from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
17
+ from evalvault.config.settings import Settings
15
18
  from evalvault.domain.entities import Dataset, EvaluationRun, TestCase
16
19
  from evalvault.domain.services.evaluator import RagasEvaluator
17
20
  from evalvault.ports.outbound.llm_port import LLMPort
@@ -30,7 +33,10 @@ class RagasEvaluatorModule(BaseAnalysisModule):
30
33
 
31
34
  def __init__(self, llm_adapter: LLMPort | None = None) -> None:
32
35
  self._llm_adapter = llm_adapter
33
- self._evaluator = RagasEvaluator()
36
+ settings = Settings()
37
+ llm_factory = SettingsLLMFactory(settings)
38
+ korean_toolkit = try_create_korean_toolkit()
39
+ self._evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
34
40
 
35
41
  def execute(
36
42
  self,
@@ -143,14 +149,12 @@ class RagasEvaluatorModule(BaseAnalysisModule):
143
149
  *,
144
150
  recomputed: bool,
145
151
  ) -> dict[str, Any]:
146
- if metrics and isinstance(next(iter(metrics.values())), list):
147
- avg_scores = average_scores(metrics) # type: ignore[arg-type]
148
- sample_count = max(
149
- (len(values) for values in metrics.values()),
150
- default=0,
151
- ) # type: ignore[arg-type]
152
+ if metrics and all(isinstance(value, list) for value in metrics.values()):
153
+ metrics_lists = cast(dict[str, list[float]], metrics)
154
+ avg_scores = average_scores(metrics_lists)
155
+ sample_count = max((len(values) for values in metrics_lists.values()), default=0)
152
156
  else:
153
- avg_scores = metrics # type: ignore[assignment]
157
+ avg_scores = cast(dict[str, float], metrics)
154
158
  sample_count = len(per_case)
155
159
 
156
160
  overall = safe_mean(avg_scores.values()) if avg_scores else 0.0
@@ -11,7 +11,7 @@ import logging
11
11
  import re
12
12
  from collections.abc import Callable, Sequence
13
13
  from dataclasses import dataclass, field
14
- from typing import TYPE_CHECKING, Any, cast
14
+ from typing import TYPE_CHECKING, Any
15
15
 
16
16
  import numpy as np
17
17
  from scipy import stats
@@ -9,7 +9,7 @@ import logging
9
9
  from collections.abc import Sequence
10
10
  from dataclasses import dataclass, field
11
11
  from pathlib import Path
12
- from typing import TYPE_CHECKING, Any
12
+ from typing import Any
13
13
 
14
14
  import yaml
15
15
 
@@ -8,6 +8,10 @@ from evalvault.adapters.outbound.llm.base import (
8
8
  LLMConfigurationError,
9
9
  create_openai_embeddings_with_legacy,
10
10
  )
11
+ from evalvault.adapters.outbound.llm.factory import (
12
+ SettingsLLMFactory,
13
+ create_llm_adapter_for_model,
14
+ )
11
15
  from evalvault.adapters.outbound.llm.llm_relation_augmenter import LLMRelationAugmenter
12
16
  from evalvault.config.settings import Settings
13
17
  from evalvault.ports.outbound.llm_port import LLMPort
@@ -70,49 +74,6 @@ def get_llm_adapter(settings: Settings) -> LLMPort:
70
74
  )
71
75
 
72
76
 
73
- def create_llm_adapter_for_model(
74
- provider: str,
75
- model_name: str,
76
- base_settings: Settings,
77
- ) -> LLMPort:
78
- provider = provider.lower()
79
-
80
- if provider == "openai":
81
- base_settings.llm_provider = "openai"
82
- base_settings.openai_model = model_name
83
- from evalvault.adapters.outbound.llm.openai_adapter import OpenAIAdapter
84
-
85
- return OpenAIAdapter(base_settings)
86
- if provider == "ollama":
87
- base_settings.llm_provider = "ollama"
88
- base_settings.ollama_model = model_name
89
- from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
90
-
91
- return OllamaAdapter(base_settings)
92
- if provider == "vllm":
93
- base_settings.llm_provider = "vllm"
94
- base_settings.vllm_model = model_name
95
- from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
96
-
97
- return VLLMAdapter(base_settings)
98
- if provider == "azure":
99
- base_settings.llm_provider = "azure"
100
- base_settings.azure_deployment = model_name
101
- from evalvault.adapters.outbound.llm.azure_adapter import AzureOpenAIAdapter
102
-
103
- return AzureOpenAIAdapter(base_settings)
104
- if provider == "anthropic":
105
- base_settings.llm_provider = "anthropic"
106
- base_settings.anthropic_model = model_name
107
- from evalvault.adapters.outbound.llm.anthropic_adapter import AnthropicAdapter
108
-
109
- return AnthropicAdapter(base_settings)
110
-
111
- raise ValueError(
112
- f"Unsupported LLM provider: '{provider}'. Supported: openai, ollama, vllm, azure, anthropic"
113
- )
114
-
115
-
116
77
  __all__ = [
117
78
  "BaseLLMAdapter",
118
79
  "LLMConfigurationError",
@@ -123,6 +84,7 @@ __all__ = [
123
84
  "LLMRelationAugmenter",
124
85
  "OllamaAdapter",
125
86
  "VLLMAdapter",
87
+ "SettingsLLMFactory",
126
88
  "get_llm_adapter",
127
89
  "create_llm_adapter_for_model",
128
90
  ]
@@ -14,7 +14,7 @@ from evalvault.adapters.outbound.llm.base import (
14
14
  from evalvault.adapters.outbound.llm.instructor_factory import create_instructor_llm
15
15
  from evalvault.config.phoenix_support import instrumentation_span, set_span_attributes
16
16
  from evalvault.config.settings import Settings
17
- from evalvault.ports.outbound.llm_port import ThinkingConfig
17
+ from evalvault.ports.outbound.llm_port import GenerationOptions, ThinkingConfig
18
18
 
19
19
  try: # Optional dependency
20
20
  from anthropic import AsyncAnthropic
@@ -147,7 +147,12 @@ class AnthropicAdapter(BaseLLMAdapter):
147
147
  """Get the extended thinking token budget."""
148
148
  return self._thinking_budget
149
149
 
150
- async def agenerate_text(self, prompt: str) -> str:
150
+ async def agenerate_text(
151
+ self,
152
+ prompt: str,
153
+ *,
154
+ options: GenerationOptions | None = None,
155
+ ) -> str:
151
156
  """Generate text from a prompt (async).
152
157
 
153
158
  Uses the Anthropic messages API for simple text generation.
@@ -158,10 +163,17 @@ class AnthropicAdapter(BaseLLMAdapter):
158
163
  Returns:
159
164
  Generated text string
160
165
  """
166
+ max_tokens = options.max_tokens if options and options.max_tokens is not None else 8192
167
+ api_kwargs: dict[str, Any] = {}
168
+ if options and options.temperature is not None:
169
+ api_kwargs["temperature"] = options.temperature
170
+ if options and options.top_p is not None:
171
+ api_kwargs["top_p"] = options.top_p
161
172
  response = await self._anthropic_client.messages.create(
162
173
  model=self._model_name,
163
- max_tokens=8192,
174
+ max_tokens=max_tokens,
164
175
  messages=[{"role": "user", "content": prompt}],
176
+ **api_kwargs,
165
177
  )
166
178
  # Extract text from response content blocks
167
179
  text_parts = []
@@ -170,7 +182,13 @@ class AnthropicAdapter(BaseLLMAdapter):
170
182
  text_parts.append(block.text)
171
183
  return "".join(text_parts)
172
184
 
173
- def generate_text(self, prompt: str, *, json_mode: bool = False) -> str:
185
+ def generate_text(
186
+ self,
187
+ prompt: str,
188
+ *,
189
+ json_mode: bool = False,
190
+ options: GenerationOptions | None = None,
191
+ ) -> str:
174
192
  """Generate text from a prompt (sync).
175
193
 
176
194
  Args:
@@ -192,12 +210,14 @@ class AnthropicAdapter(BaseLLMAdapter):
192
210
  import nest_asyncio
193
211
 
194
212
  nest_asyncio.apply()
195
- return loop.run_until_complete(self.agenerate_text(prompt))
213
+ return loop.run_until_complete(self.agenerate_text(prompt, options=options))
196
214
  except ImportError:
197
215
  import concurrent.futures
198
216
 
199
217
  with concurrent.futures.ThreadPoolExecutor() as executor:
200
- future = executor.submit(asyncio.run, self.agenerate_text(prompt))
218
+ future = executor.submit(
219
+ asyncio.run, self.agenerate_text(prompt, options=options)
220
+ )
201
221
  return future.result()
202
222
  else:
203
- return asyncio.run(self.agenerate_text(prompt))
223
+ return asyncio.run(self.agenerate_text(prompt, options=options))
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ from evalvault.config.settings import Settings
4
+ from evalvault.ports.outbound.llm_factory_port import LLMFactoryPort
5
+ from evalvault.ports.outbound.llm_port import LLMPort
6
+
7
+
8
+ class SettingsLLMFactory(LLMFactoryPort):
9
+ def __init__(self, settings: Settings) -> None:
10
+ self._settings = settings
11
+
12
+ def create_faithfulness_fallback(
13
+ self,
14
+ active_provider: str | None,
15
+ active_model: str | None,
16
+ ) -> LLMPort | None:
17
+ provider, model = _resolve_faithfulness_fallback_config(
18
+ settings=self._settings,
19
+ active_provider=active_provider,
20
+ active_model=active_model,
21
+ )
22
+ if not provider or not model:
23
+ return None
24
+ return create_llm_adapter_for_model(provider, model, self._settings)
25
+
26
+
27
+ def create_llm_adapter_for_model(
28
+ provider: str,
29
+ model_name: str,
30
+ base_settings: Settings,
31
+ ) -> LLMPort:
32
+ provider = provider.lower()
33
+
34
+ if provider == "openai":
35
+ base_settings.llm_provider = "openai"
36
+ base_settings.openai_model = model_name
37
+ from evalvault.adapters.outbound.llm.openai_adapter import OpenAIAdapter
38
+
39
+ return OpenAIAdapter(base_settings)
40
+ if provider == "ollama":
41
+ base_settings.llm_provider = "ollama"
42
+ base_settings.ollama_model = model_name
43
+ from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
44
+
45
+ return OllamaAdapter(base_settings)
46
+ if provider == "vllm":
47
+ base_settings.llm_provider = "vllm"
48
+ base_settings.vllm_model = model_name
49
+ from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
50
+
51
+ return VLLMAdapter(base_settings)
52
+ if provider == "azure":
53
+ base_settings.llm_provider = "azure"
54
+ base_settings.azure_deployment = model_name
55
+ from evalvault.adapters.outbound.llm.azure_adapter import AzureOpenAIAdapter
56
+
57
+ return AzureOpenAIAdapter(base_settings)
58
+ if provider == "anthropic":
59
+ base_settings.llm_provider = "anthropic"
60
+ base_settings.anthropic_model = model_name
61
+ from evalvault.adapters.outbound.llm.anthropic_adapter import AnthropicAdapter
62
+
63
+ return AnthropicAdapter(base_settings)
64
+
65
+ raise ValueError(
66
+ f"Unsupported LLM provider: '{provider}'. Supported: openai, ollama, vllm, azure, anthropic"
67
+ )
68
+
69
+
70
+ def _resolve_faithfulness_fallback_config(
71
+ *,
72
+ settings: Settings,
73
+ active_provider: str | None,
74
+ active_model: str | None,
75
+ ) -> tuple[str | None, str | None]:
76
+ provider = (
77
+ settings.faithfulness_fallback_provider.strip().lower()
78
+ if settings.faithfulness_fallback_provider
79
+ else None
80
+ )
81
+ model = settings.faithfulness_fallback_model
82
+ normalized_active = active_provider.strip().lower() if active_provider else None
83
+ default_provider = normalized_active or settings.llm_provider.lower()
84
+
85
+ if not provider and model:
86
+ provider = default_provider
87
+ if provider and not model:
88
+ model = _default_faithfulness_fallback_model(provider)
89
+ if not provider and not model:
90
+ provider = default_provider
91
+ model = _default_faithfulness_fallback_model(default_provider)
92
+
93
+ if not provider or not model:
94
+ return None, None
95
+ return provider, model
96
+
97
+
98
+ def _default_faithfulness_fallback_model(provider: str) -> str | None:
99
+ if provider == "ollama":
100
+ return "gpt-oss-safeguard:20b"
101
+ if provider == "vllm":
102
+ return "gpt-oss-120b"
103
+ return None
@@ -9,6 +9,29 @@ from evalvault.domain.services.entity_extractor import Entity, Relation
9
9
  from evalvault.ports.outbound.llm_port import LLMPort
10
10
  from evalvault.ports.outbound.relation_augmenter_port import RelationAugmenterPort
11
11
 
12
+ _RELATION_SYSTEM_PROMPT_KO = (
13
+ "당신은 한국어 보험 문서의 지식 그래프 감사자입니다. "
14
+ "제공된 문서 스니펫을 보고 관계를 확인하거나 수정하세요."
15
+ )
16
+ _RELATION_SYSTEM_PROMPT_EN = (
17
+ "You are a knowledge graph auditor for Korean insurance documents. "
18
+ "Review the provided document snippet and confirm or fix the relations."
19
+ )
20
+ _RELATION_PROMPT_TEMPLATE_KO = (
21
+ "{system_prompt}\n"
22
+ "source, target, relation_type, confidence, justification 키를 포함한 JSON 배열만 반환하세요.\n\n"
23
+ "문서:\n{document_text}\n\n"
24
+ "엔티티:\n{entity_lines}\n\n"
25
+ "낮은 신뢰도 관계:\n{relation_lines}"
26
+ )
27
+ _RELATION_PROMPT_TEMPLATE_EN = (
28
+ "{system_prompt}\n"
29
+ "Return a JSON array of objects with keys source, target, relation_type, confidence, justification.\n\n"
30
+ "Document:\n{document_text}\n\n"
31
+ "Entities:\n{entity_lines}\n\n"
32
+ "Low-confidence relations:\n{relation_lines}"
33
+ )
34
+
12
35
 
13
36
  class LLMRelationAugmenter(RelationAugmenterPort):
14
37
  """LLM을 사용해 저신뢰 관계를 검증/보강."""
@@ -18,13 +41,17 @@ class LLMRelationAugmenter(RelationAugmenterPort):
18
41
  llm_port: LLMPort,
19
42
  max_relations: int = 5,
20
43
  system_prompt: str | None = None,
44
+ language: str = "ko",
21
45
  ):
22
46
  self._llm_port = llm_port
23
47
  self._max_relations = max_relations
24
- self._system_prompt = system_prompt or (
25
- "You are a knowledge graph auditor for Korean insurance documents. "
26
- "Review the provided document snippet and confirm or fix the relations."
27
- )
48
+ self._language = language
49
+ if system_prompt:
50
+ self._system_prompt = system_prompt
51
+ else:
52
+ self._system_prompt = (
53
+ _RELATION_SYSTEM_PROMPT_EN if language == "en" else _RELATION_SYSTEM_PROMPT_KO
54
+ )
28
55
 
29
56
  def augment_relations(
30
57
  self,
@@ -75,16 +102,14 @@ class LLMRelationAugmenter(RelationAugmenterPort):
75
102
  f"- {rel.source} -> {rel.target} [{rel.relation_type}] conf={rel.confidence:.2f}"
76
103
  for rel in relations
77
104
  ]
78
- return (
79
- f"{self._system_prompt}\n"
80
- "Return a JSON array of objects with keys "
81
- "source, target, relation_type, confidence, justification.\n\n"
82
- "Document:\n"
83
- f"{document_text}\n\n"
84
- "Entities:\n"
85
- f"{chr(10).join(entity_lines)}\n\n"
86
- "Low-confidence relations:\n"
87
- f"{chr(10).join(relation_lines)}"
105
+ template = (
106
+ _RELATION_PROMPT_TEMPLATE_EN if self._language == "en" else _RELATION_PROMPT_TEMPLATE_KO
107
+ )
108
+ return template.format(
109
+ system_prompt=self._system_prompt,
110
+ document_text=document_text,
111
+ entity_lines=chr(10).join(entity_lines),
112
+ relation_lines=chr(10).join(relation_lines),
88
113
  )
89
114
 
90
115
  @staticmethod
@@ -19,7 +19,7 @@ from evalvault.adapters.outbound.llm.base import BaseLLMAdapter
19
19
  from evalvault.adapters.outbound.llm.instructor_factory import create_instructor_llm
20
20
  from evalvault.adapters.outbound.llm.token_aware_chat import ThinkingTokenTrackingAsyncOpenAI
21
21
  from evalvault.config.settings import Settings
22
- from evalvault.ports.outbound.llm_port import ThinkingConfig
22
+ from evalvault.ports.outbound.llm_port import GenerationOptions, ThinkingConfig
23
23
 
24
24
 
25
25
  class OllamaAdapter(BaseLLMAdapter):
@@ -240,7 +240,12 @@ class OllamaAdapter(BaseLLMAdapter):
240
240
  else:
241
241
  return asyncio.run(self.embed(texts, model, dimension))
242
242
 
243
- async def agenerate_text(self, prompt: str) -> str:
243
+ async def agenerate_text(
244
+ self,
245
+ prompt: str,
246
+ *,
247
+ options: GenerationOptions | None = None,
248
+ ) -> str:
244
249
  """Generate text from a prompt (async).
245
250
 
246
251
  Uses the Ollama OpenAI-compatible API for simple text generation.
@@ -251,13 +256,30 @@ class OllamaAdapter(BaseLLMAdapter):
251
256
  Returns:
252
257
  Generated text string
253
258
  """
254
- response = await self._embedding_client.chat.completions.create(
255
- model=self._ollama_model,
256
- messages=[{"role": "user", "content": prompt}],
257
- )
259
+ api_kwargs: dict[str, Any] = {
260
+ "model": self._ollama_model,
261
+ "messages": [{"role": "user", "content": prompt}],
262
+ }
263
+ if options and options.max_tokens is not None:
264
+ api_kwargs["max_completion_tokens"] = options.max_tokens
265
+ if options and options.temperature is not None:
266
+ api_kwargs["temperature"] = options.temperature
267
+ if options and options.top_p is not None:
268
+ api_kwargs["top_p"] = options.top_p
269
+ if options and options.n is not None:
270
+ api_kwargs["n"] = options.n
271
+ if options and options.seed is not None:
272
+ api_kwargs["seed"] = options.seed
273
+ response = await self._embedding_client.chat.completions.create(**api_kwargs)
258
274
  return response.choices[0].message.content or ""
259
275
 
260
- def generate_text(self, prompt: str, *, json_mode: bool = False) -> str:
276
+ def generate_text(
277
+ self,
278
+ prompt: str,
279
+ *,
280
+ json_mode: bool = False,
281
+ options: GenerationOptions | None = None,
282
+ ) -> str:
261
283
  """Generate text from a prompt (sync).
262
284
 
263
285
  Args:
@@ -279,12 +301,14 @@ class OllamaAdapter(BaseLLMAdapter):
279
301
  import nest_asyncio
280
302
 
281
303
  nest_asyncio.apply()
282
- return loop.run_until_complete(self.agenerate_text(prompt))
304
+ return loop.run_until_complete(self.agenerate_text(prompt, options=options))
283
305
  except ImportError:
284
306
  import concurrent.futures
285
307
 
286
308
  with concurrent.futures.ThreadPoolExecutor() as executor:
287
- future = executor.submit(asyncio.run, self.agenerate_text(prompt))
309
+ future = executor.submit(
310
+ asyncio.run, self.agenerate_text(prompt, options=options)
311
+ )
288
312
  return future.result()
289
313
  else:
290
- return asyncio.run(self.agenerate_text(prompt))
314
+ return asyncio.run(self.agenerate_text(prompt, options=options))