evalvault 1.62.1__py3-none-any.whl → 1.63.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. evalvault/adapters/inbound/api/adapter.py +190 -19
  2. evalvault/adapters/inbound/api/routers/runs.py +66 -2
  3. evalvault/adapters/inbound/cli/commands/method.py +5 -2
  4. evalvault/adapters/inbound/cli/commands/prompts.py +613 -5
  5. evalvault/adapters/inbound/cli/commands/run.py +43 -2
  6. evalvault/adapters/inbound/cli/commands/run_helpers.py +10 -0
  7. evalvault/adapters/inbound/mcp/tools.py +5 -2
  8. evalvault/adapters/outbound/analysis/ragas_evaluator_module.py +13 -9
  9. evalvault/adapters/outbound/llm/__init__.py +5 -43
  10. evalvault/adapters/outbound/llm/anthropic_adapter.py +27 -7
  11. evalvault/adapters/outbound/llm/factory.py +103 -0
  12. evalvault/adapters/outbound/llm/llm_relation_augmenter.py +39 -14
  13. evalvault/adapters/outbound/llm/ollama_adapter.py +34 -10
  14. evalvault/adapters/outbound/llm/openai_adapter.py +41 -8
  15. evalvault/adapters/outbound/llm/token_aware_chat.py +21 -2
  16. evalvault/adapters/outbound/llm/vllm_adapter.py +39 -8
  17. evalvault/adapters/outbound/nlp/korean/toolkit_factory.py +20 -0
  18. evalvault/adapters/outbound/report/llm_report_generator.py +90 -6
  19. evalvault/adapters/outbound/storage/base_sql.py +527 -21
  20. evalvault/adapters/outbound/storage/postgres_adapter.py +209 -0
  21. evalvault/adapters/outbound/storage/postgres_schema.sql +38 -0
  22. evalvault/adapters/outbound/storage/sqlite_adapter.py +86 -5
  23. evalvault/debug_ragas.py +7 -1
  24. evalvault/debug_ragas_real.py +5 -1
  25. evalvault/domain/entities/__init__.py +10 -0
  26. evalvault/domain/entities/prompt_suggestion.py +50 -0
  27. evalvault/domain/services/__init__.py +6 -0
  28. evalvault/domain/services/evaluator.py +191 -103
  29. evalvault/domain/services/holdout_splitter.py +67 -0
  30. evalvault/domain/services/intent_classifier.py +73 -0
  31. evalvault/domain/services/pipeline_template_registry.py +3 -0
  32. evalvault/domain/services/prompt_candidate_service.py +117 -0
  33. evalvault/domain/services/prompt_registry.py +40 -2
  34. evalvault/domain/services/prompt_scoring_service.py +286 -0
  35. evalvault/domain/services/prompt_suggestion_reporter.py +277 -0
  36. evalvault/domain/services/synthetic_qa_generator.py +4 -3
  37. evalvault/ports/inbound/learning_hook_port.py +4 -1
  38. evalvault/ports/outbound/__init__.py +2 -0
  39. evalvault/ports/outbound/llm_factory_port.py +13 -0
  40. evalvault/ports/outbound/llm_port.py +34 -2
  41. evalvault/ports/outbound/storage_port.py +38 -0
  42. {evalvault-1.62.1.dist-info → evalvault-1.63.0.dist-info}/METADATA +228 -4
  43. {evalvault-1.62.1.dist-info → evalvault-1.63.0.dist-info}/RECORD +46 -38
  44. {evalvault-1.62.1.dist-info → evalvault-1.63.0.dist-info}/WHEEL +0 -0
  45. {evalvault-1.62.1.dist-info → evalvault-1.63.0.dist-info}/entry_points.txt +0 -0
  46. {evalvault-1.62.1.dist-info → evalvault-1.63.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -20,7 +20,8 @@ from evalvault.adapters.outbound.documents.versioned_loader import (
20
20
  load_versioned_chunks_from_pdf_dir,
21
21
  )
22
22
  from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
23
- from evalvault.adapters.outbound.llm import get_llm_adapter
23
+ from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
24
+ from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
24
25
  from evalvault.adapters.outbound.phoenix.sync_service import (
25
26
  PhoenixDatasetInfo,
26
27
  PhoenixSyncError,
@@ -39,6 +40,7 @@ from evalvault.domain.services.memory_based_analysis import MemoryBasedAnalysis
39
40
  from evalvault.domain.services.prompt_registry import (
40
41
  PromptInput,
41
42
  build_prompt_bundle,
43
+ build_prompt_inputs_from_snapshots,
42
44
  build_prompt_summary,
43
45
  )
44
46
  from evalvault.domain.services.ragas_prompt_overrides import (
@@ -808,6 +810,9 @@ def register_run_commands(
808
810
  if profile_name:
809
811
  settings = apply_profile(settings, profile_name)
810
812
 
813
+ if db_path is None:
814
+ db_path = Path(settings.evalvault_db_path)
815
+
811
816
  # Override model if specified
812
817
  if model:
813
818
  if _is_oss_open_model(model) and settings.llm_provider != "vllm":
@@ -1436,7 +1441,9 @@ def register_run_commands(
1436
1441
  if should_enable_phoenix:
1437
1442
  ensure_phoenix_instrumentation(settings, console=console, force=True)
1438
1443
 
1439
- evaluator = RagasEvaluator()
1444
+ llm_factory = SettingsLLMFactory(settings)
1445
+ korean_toolkit = try_create_korean_toolkit()
1446
+ evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
1440
1447
  llm_adapter = None
1441
1448
  try:
1442
1449
  llm_adapter = get_llm_adapter(settings)
@@ -1707,6 +1714,40 @@ def register_run_commands(
1707
1714
  result.retrieval_metadata = merged_retriever_metadata
1708
1715
 
1709
1716
  result.tracker_metadata.setdefault("run_mode", preset.name)
1717
+ tracker_meta = result.tracker_metadata or {}
1718
+ result.tracker_metadata = tracker_meta
1719
+ ragas_snapshots = tracker_meta.get("ragas_prompt_snapshots")
1720
+ ragas_snapshot_inputs = build_prompt_inputs_from_snapshots(
1721
+ ragas_snapshots if isinstance(ragas_snapshots, dict) else None,
1722
+ )
1723
+ override_status: dict[str, str] = {}
1724
+ raw_override = tracker_meta.get("ragas_prompt_overrides")
1725
+ if isinstance(raw_override, dict):
1726
+ override_status = cast(dict[str, str], raw_override)
1727
+ if override_status:
1728
+ prompt_inputs = [
1729
+ entry
1730
+ for entry in prompt_inputs
1731
+ if not (
1732
+ entry.kind == "ragas"
1733
+ and override_status.get(entry.role) is not None
1734
+ and override_status.get(entry.role) != "applied"
1735
+ )
1736
+ ]
1737
+
1738
+ if ragas_snapshot_inputs:
1739
+ existing_roles = {entry.role for entry in prompt_inputs if entry.kind == "ragas"}
1740
+ for entry in ragas_snapshot_inputs:
1741
+ if entry.role in existing_roles and override_status.get(entry.role) == "applied":
1742
+ continue
1743
+ prompt_inputs.append(entry)
1744
+ if prompt_inputs and not db_path:
1745
+ print_cli_warning(
1746
+ console,
1747
+ "Prompt snapshot은 --db 저장 시에만 DB에 기록됩니다.",
1748
+ tips=["--db data/db/evalvault.db 옵션을 추가하세요."],
1749
+ )
1750
+
1710
1751
  if prompt_inputs:
1711
1752
  prompt_bundle = build_prompt_bundle(
1712
1753
  run_id=result.run_id,
@@ -443,6 +443,16 @@ def _save_to_db(
443
443
  result.run_id,
444
444
  prompt_bundle.prompt_set.prompt_set_id,
445
445
  )
446
+ excel_path = db_path.parent / f"evalvault_run_{result.run_id}.xlsx"
447
+ try:
448
+ storage.export_run_to_excel(result.run_id, excel_path)
449
+ console.print(f"[green]Excel export saved: {excel_path}[/green]")
450
+ except Exception as exc:
451
+ print_cli_warning(
452
+ console,
453
+ "엑셀 내보내기에 실패했습니다.",
454
+ tips=[str(exc)],
455
+ )
446
456
  console.print(f"[green]Results saved to database: {db_path}[/green]")
447
457
  console.print(f"[dim]Run ID: {result.run_id}[/dim]")
448
458
  if prompt_bundle:
@@ -18,7 +18,8 @@ from evalvault.adapters.inbound.cli.utils.analysis_io import (
18
18
  )
19
19
  from evalvault.adapters.outbound.analysis.pipeline_factory import build_analysis_pipeline_service
20
20
  from evalvault.adapters.outbound.analysis.statistical_adapter import StatisticalAnalysisAdapter
21
- from evalvault.adapters.outbound.llm import get_llm_adapter
21
+ from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
22
+ from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
22
23
  from evalvault.adapters.outbound.storage.sqlite_adapter import SQLiteStorageAdapter
23
24
  from evalvault.config.settings import Settings, apply_profile
24
25
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
@@ -175,7 +176,9 @@ def run_evaluation(payload: dict[str, Any] | RunEvaluationRequest) -> RunEvaluat
175
176
  )
176
177
 
177
178
  storage = SQLiteStorageAdapter(db_path=db_path)
178
- evaluator = RagasEvaluator()
179
+ llm_factory = SettingsLLMFactory(settings)
180
+ korean_toolkit = try_create_korean_toolkit()
181
+ evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
179
182
  adapter = WebUIAdapter(
180
183
  storage=storage,
181
184
  evaluator=evaluator,
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Any
5
+ from typing import Any, cast
6
6
 
7
7
  from evalvault.adapters.outbound.analysis.base_module import BaseAnalysisModule
8
8
  from evalvault.adapters.outbound.analysis.pipeline_helpers import (
@@ -12,6 +12,9 @@ from evalvault.adapters.outbound.analysis.pipeline_helpers import (
12
12
  safe_mean,
13
13
  truncate_text,
14
14
  )
15
+ from evalvault.adapters.outbound.llm import SettingsLLMFactory
16
+ from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
17
+ from evalvault.config.settings import Settings
15
18
  from evalvault.domain.entities import Dataset, EvaluationRun, TestCase
16
19
  from evalvault.domain.services.evaluator import RagasEvaluator
17
20
  from evalvault.ports.outbound.llm_port import LLMPort
@@ -30,7 +33,10 @@ class RagasEvaluatorModule(BaseAnalysisModule):
30
33
 
31
34
  def __init__(self, llm_adapter: LLMPort | None = None) -> None:
32
35
  self._llm_adapter = llm_adapter
33
- self._evaluator = RagasEvaluator()
36
+ settings = Settings()
37
+ llm_factory = SettingsLLMFactory(settings)
38
+ korean_toolkit = try_create_korean_toolkit()
39
+ self._evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
34
40
 
35
41
  def execute(
36
42
  self,
@@ -143,14 +149,12 @@ class RagasEvaluatorModule(BaseAnalysisModule):
143
149
  *,
144
150
  recomputed: bool,
145
151
  ) -> dict[str, Any]:
146
- if metrics and isinstance(next(iter(metrics.values())), list):
147
- avg_scores = average_scores(metrics) # type: ignore[arg-type]
148
- sample_count = max(
149
- (len(values) for values in metrics.values()),
150
- default=0,
151
- ) # type: ignore[arg-type]
152
+ if metrics and all(isinstance(value, list) for value in metrics.values()):
153
+ metrics_lists = cast(dict[str, list[float]], metrics)
154
+ avg_scores = average_scores(metrics_lists)
155
+ sample_count = max((len(values) for values in metrics_lists.values()), default=0)
152
156
  else:
153
- avg_scores = metrics # type: ignore[assignment]
157
+ avg_scores = cast(dict[str, float], metrics)
154
158
  sample_count = len(per_case)
155
159
 
156
160
  overall = safe_mean(avg_scores.values()) if avg_scores else 0.0
@@ -8,6 +8,10 @@ from evalvault.adapters.outbound.llm.base import (
8
8
  LLMConfigurationError,
9
9
  create_openai_embeddings_with_legacy,
10
10
  )
11
+ from evalvault.adapters.outbound.llm.factory import (
12
+ SettingsLLMFactory,
13
+ create_llm_adapter_for_model,
14
+ )
11
15
  from evalvault.adapters.outbound.llm.llm_relation_augmenter import LLMRelationAugmenter
12
16
  from evalvault.config.settings import Settings
13
17
  from evalvault.ports.outbound.llm_port import LLMPort
@@ -70,49 +74,6 @@ def get_llm_adapter(settings: Settings) -> LLMPort:
70
74
  )
71
75
 
72
76
 
73
- def create_llm_adapter_for_model(
74
- provider: str,
75
- model_name: str,
76
- base_settings: Settings,
77
- ) -> LLMPort:
78
- provider = provider.lower()
79
-
80
- if provider == "openai":
81
- base_settings.llm_provider = "openai"
82
- base_settings.openai_model = model_name
83
- from evalvault.adapters.outbound.llm.openai_adapter import OpenAIAdapter
84
-
85
- return OpenAIAdapter(base_settings)
86
- if provider == "ollama":
87
- base_settings.llm_provider = "ollama"
88
- base_settings.ollama_model = model_name
89
- from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
90
-
91
- return OllamaAdapter(base_settings)
92
- if provider == "vllm":
93
- base_settings.llm_provider = "vllm"
94
- base_settings.vllm_model = model_name
95
- from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
96
-
97
- return VLLMAdapter(base_settings)
98
- if provider == "azure":
99
- base_settings.llm_provider = "azure"
100
- base_settings.azure_deployment = model_name
101
- from evalvault.adapters.outbound.llm.azure_adapter import AzureOpenAIAdapter
102
-
103
- return AzureOpenAIAdapter(base_settings)
104
- if provider == "anthropic":
105
- base_settings.llm_provider = "anthropic"
106
- base_settings.anthropic_model = model_name
107
- from evalvault.adapters.outbound.llm.anthropic_adapter import AnthropicAdapter
108
-
109
- return AnthropicAdapter(base_settings)
110
-
111
- raise ValueError(
112
- f"Unsupported LLM provider: '{provider}'. Supported: openai, ollama, vllm, azure, anthropic"
113
- )
114
-
115
-
116
77
  __all__ = [
117
78
  "BaseLLMAdapter",
118
79
  "LLMConfigurationError",
@@ -123,6 +84,7 @@ __all__ = [
123
84
  "LLMRelationAugmenter",
124
85
  "OllamaAdapter",
125
86
  "VLLMAdapter",
87
+ "SettingsLLMFactory",
126
88
  "get_llm_adapter",
127
89
  "create_llm_adapter_for_model",
128
90
  ]
@@ -14,7 +14,7 @@ from evalvault.adapters.outbound.llm.base import (
14
14
  from evalvault.adapters.outbound.llm.instructor_factory import create_instructor_llm
15
15
  from evalvault.config.phoenix_support import instrumentation_span, set_span_attributes
16
16
  from evalvault.config.settings import Settings
17
- from evalvault.ports.outbound.llm_port import ThinkingConfig
17
+ from evalvault.ports.outbound.llm_port import GenerationOptions, ThinkingConfig
18
18
 
19
19
  try: # Optional dependency
20
20
  from anthropic import AsyncAnthropic
@@ -147,7 +147,12 @@ class AnthropicAdapter(BaseLLMAdapter):
147
147
  """Get the extended thinking token budget."""
148
148
  return self._thinking_budget
149
149
 
150
- async def agenerate_text(self, prompt: str) -> str:
150
+ async def agenerate_text(
151
+ self,
152
+ prompt: str,
153
+ *,
154
+ options: GenerationOptions | None = None,
155
+ ) -> str:
151
156
  """Generate text from a prompt (async).
152
157
 
153
158
  Uses the Anthropic messages API for simple text generation.
@@ -158,10 +163,17 @@ class AnthropicAdapter(BaseLLMAdapter):
158
163
  Returns:
159
164
  Generated text string
160
165
  """
166
+ max_tokens = options.max_tokens if options and options.max_tokens is not None else 8192
167
+ api_kwargs: dict[str, Any] = {}
168
+ if options and options.temperature is not None:
169
+ api_kwargs["temperature"] = options.temperature
170
+ if options and options.top_p is not None:
171
+ api_kwargs["top_p"] = options.top_p
161
172
  response = await self._anthropic_client.messages.create(
162
173
  model=self._model_name,
163
- max_tokens=8192,
174
+ max_tokens=max_tokens,
164
175
  messages=[{"role": "user", "content": prompt}],
176
+ **api_kwargs,
165
177
  )
166
178
  # Extract text from response content blocks
167
179
  text_parts = []
@@ -170,7 +182,13 @@ class AnthropicAdapter(BaseLLMAdapter):
170
182
  text_parts.append(block.text)
171
183
  return "".join(text_parts)
172
184
 
173
- def generate_text(self, prompt: str, *, json_mode: bool = False) -> str:
185
+ def generate_text(
186
+ self,
187
+ prompt: str,
188
+ *,
189
+ json_mode: bool = False,
190
+ options: GenerationOptions | None = None,
191
+ ) -> str:
174
192
  """Generate text from a prompt (sync).
175
193
 
176
194
  Args:
@@ -192,12 +210,14 @@ class AnthropicAdapter(BaseLLMAdapter):
192
210
  import nest_asyncio
193
211
 
194
212
  nest_asyncio.apply()
195
- return loop.run_until_complete(self.agenerate_text(prompt))
213
+ return loop.run_until_complete(self.agenerate_text(prompt, options=options))
196
214
  except ImportError:
197
215
  import concurrent.futures
198
216
 
199
217
  with concurrent.futures.ThreadPoolExecutor() as executor:
200
- future = executor.submit(asyncio.run, self.agenerate_text(prompt))
218
+ future = executor.submit(
219
+ asyncio.run, self.agenerate_text(prompt, options=options)
220
+ )
201
221
  return future.result()
202
222
  else:
203
- return asyncio.run(self.agenerate_text(prompt))
223
+ return asyncio.run(self.agenerate_text(prompt, options=options))
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ from evalvault.config.settings import Settings
4
+ from evalvault.ports.outbound.llm_factory_port import LLMFactoryPort
5
+ from evalvault.ports.outbound.llm_port import LLMPort
6
+
7
+
8
+ class SettingsLLMFactory(LLMFactoryPort):
9
+ def __init__(self, settings: Settings) -> None:
10
+ self._settings = settings
11
+
12
+ def create_faithfulness_fallback(
13
+ self,
14
+ active_provider: str | None,
15
+ active_model: str | None,
16
+ ) -> LLMPort | None:
17
+ provider, model = _resolve_faithfulness_fallback_config(
18
+ settings=self._settings,
19
+ active_provider=active_provider,
20
+ active_model=active_model,
21
+ )
22
+ if not provider or not model:
23
+ return None
24
+ return create_llm_adapter_for_model(provider, model, self._settings)
25
+
26
+
27
+ def create_llm_adapter_for_model(
28
+ provider: str,
29
+ model_name: str,
30
+ base_settings: Settings,
31
+ ) -> LLMPort:
32
+ provider = provider.lower()
33
+
34
+ if provider == "openai":
35
+ base_settings.llm_provider = "openai"
36
+ base_settings.openai_model = model_name
37
+ from evalvault.adapters.outbound.llm.openai_adapter import OpenAIAdapter
38
+
39
+ return OpenAIAdapter(base_settings)
40
+ if provider == "ollama":
41
+ base_settings.llm_provider = "ollama"
42
+ base_settings.ollama_model = model_name
43
+ from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
44
+
45
+ return OllamaAdapter(base_settings)
46
+ if provider == "vllm":
47
+ base_settings.llm_provider = "vllm"
48
+ base_settings.vllm_model = model_name
49
+ from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
50
+
51
+ return VLLMAdapter(base_settings)
52
+ if provider == "azure":
53
+ base_settings.llm_provider = "azure"
54
+ base_settings.azure_deployment = model_name
55
+ from evalvault.adapters.outbound.llm.azure_adapter import AzureOpenAIAdapter
56
+
57
+ return AzureOpenAIAdapter(base_settings)
58
+ if provider == "anthropic":
59
+ base_settings.llm_provider = "anthropic"
60
+ base_settings.anthropic_model = model_name
61
+ from evalvault.adapters.outbound.llm.anthropic_adapter import AnthropicAdapter
62
+
63
+ return AnthropicAdapter(base_settings)
64
+
65
+ raise ValueError(
66
+ f"Unsupported LLM provider: '{provider}'. Supported: openai, ollama, vllm, azure, anthropic"
67
+ )
68
+
69
+
70
+ def _resolve_faithfulness_fallback_config(
71
+ *,
72
+ settings: Settings,
73
+ active_provider: str | None,
74
+ active_model: str | None,
75
+ ) -> tuple[str | None, str | None]:
76
+ provider = (
77
+ settings.faithfulness_fallback_provider.strip().lower()
78
+ if settings.faithfulness_fallback_provider
79
+ else None
80
+ )
81
+ model = settings.faithfulness_fallback_model
82
+ normalized_active = active_provider.strip().lower() if active_provider else None
83
+ default_provider = normalized_active or settings.llm_provider.lower()
84
+
85
+ if not provider and model:
86
+ provider = default_provider
87
+ if provider and not model:
88
+ model = _default_faithfulness_fallback_model(provider)
89
+ if not provider and not model:
90
+ provider = default_provider
91
+ model = _default_faithfulness_fallback_model(default_provider)
92
+
93
+ if not provider or not model:
94
+ return None, None
95
+ return provider, model
96
+
97
+
98
+ def _default_faithfulness_fallback_model(provider: str) -> str | None:
99
+ if provider == "ollama":
100
+ return "gpt-oss-safeguard:20b"
101
+ if provider == "vllm":
102
+ return "gpt-oss-120b"
103
+ return None
@@ -9,6 +9,29 @@ from evalvault.domain.services.entity_extractor import Entity, Relation
9
9
  from evalvault.ports.outbound.llm_port import LLMPort
10
10
  from evalvault.ports.outbound.relation_augmenter_port import RelationAugmenterPort
11
11
 
12
+ _RELATION_SYSTEM_PROMPT_KO = (
13
+ "당신은 한국어 보험 문서의 지식 그래프 감사자입니다. "
14
+ "제공된 문서 스니펫을 보고 관계를 확인하거나 수정하세요."
15
+ )
16
+ _RELATION_SYSTEM_PROMPT_EN = (
17
+ "You are a knowledge graph auditor for Korean insurance documents. "
18
+ "Review the provided document snippet and confirm or fix the relations."
19
+ )
20
+ _RELATION_PROMPT_TEMPLATE_KO = (
21
+ "{system_prompt}\n"
22
+ "source, target, relation_type, confidence, justification 키를 포함한 JSON 배열만 반환하세요.\n\n"
23
+ "문서:\n{document_text}\n\n"
24
+ "엔티티:\n{entity_lines}\n\n"
25
+ "낮은 신뢰도 관계:\n{relation_lines}"
26
+ )
27
+ _RELATION_PROMPT_TEMPLATE_EN = (
28
+ "{system_prompt}\n"
29
+ "Return a JSON array of objects with keys source, target, relation_type, confidence, justification.\n\n"
30
+ "Document:\n{document_text}\n\n"
31
+ "Entities:\n{entity_lines}\n\n"
32
+ "Low-confidence relations:\n{relation_lines}"
33
+ )
34
+
12
35
 
13
36
  class LLMRelationAugmenter(RelationAugmenterPort):
14
37
  """LLM을 사용해 저신뢰 관계를 검증/보강."""
@@ -18,13 +41,17 @@ class LLMRelationAugmenter(RelationAugmenterPort):
18
41
  llm_port: LLMPort,
19
42
  max_relations: int = 5,
20
43
  system_prompt: str | None = None,
44
+ language: str = "ko",
21
45
  ):
22
46
  self._llm_port = llm_port
23
47
  self._max_relations = max_relations
24
- self._system_prompt = system_prompt or (
25
- "You are a knowledge graph auditor for Korean insurance documents. "
26
- "Review the provided document snippet and confirm or fix the relations."
27
- )
48
+ self._language = language
49
+ if system_prompt:
50
+ self._system_prompt = system_prompt
51
+ else:
52
+ self._system_prompt = (
53
+ _RELATION_SYSTEM_PROMPT_EN if language == "en" else _RELATION_SYSTEM_PROMPT_KO
54
+ )
28
55
 
29
56
  def augment_relations(
30
57
  self,
@@ -75,16 +102,14 @@ class LLMRelationAugmenter(RelationAugmenterPort):
75
102
  f"- {rel.source} -> {rel.target} [{rel.relation_type}] conf={rel.confidence:.2f}"
76
103
  for rel in relations
77
104
  ]
78
- return (
79
- f"{self._system_prompt}\n"
80
- "Return a JSON array of objects with keys "
81
- "source, target, relation_type, confidence, justification.\n\n"
82
- "Document:\n"
83
- f"{document_text}\n\n"
84
- "Entities:\n"
85
- f"{chr(10).join(entity_lines)}\n\n"
86
- "Low-confidence relations:\n"
87
- f"{chr(10).join(relation_lines)}"
105
+ template = (
106
+ _RELATION_PROMPT_TEMPLATE_EN if self._language == "en" else _RELATION_PROMPT_TEMPLATE_KO
107
+ )
108
+ return template.format(
109
+ system_prompt=self._system_prompt,
110
+ document_text=document_text,
111
+ entity_lines=chr(10).join(entity_lines),
112
+ relation_lines=chr(10).join(relation_lines),
88
113
  )
89
114
 
90
115
  @staticmethod
@@ -19,7 +19,7 @@ from evalvault.adapters.outbound.llm.base import BaseLLMAdapter
19
19
  from evalvault.adapters.outbound.llm.instructor_factory import create_instructor_llm
20
20
  from evalvault.adapters.outbound.llm.token_aware_chat import ThinkingTokenTrackingAsyncOpenAI
21
21
  from evalvault.config.settings import Settings
22
- from evalvault.ports.outbound.llm_port import ThinkingConfig
22
+ from evalvault.ports.outbound.llm_port import GenerationOptions, ThinkingConfig
23
23
 
24
24
 
25
25
  class OllamaAdapter(BaseLLMAdapter):
@@ -240,7 +240,12 @@ class OllamaAdapter(BaseLLMAdapter):
240
240
  else:
241
241
  return asyncio.run(self.embed(texts, model, dimension))
242
242
 
243
- async def agenerate_text(self, prompt: str) -> str:
243
+ async def agenerate_text(
244
+ self,
245
+ prompt: str,
246
+ *,
247
+ options: GenerationOptions | None = None,
248
+ ) -> str:
244
249
  """Generate text from a prompt (async).
245
250
 
246
251
  Uses the Ollama OpenAI-compatible API for simple text generation.
@@ -251,13 +256,30 @@ class OllamaAdapter(BaseLLMAdapter):
251
256
  Returns:
252
257
  Generated text string
253
258
  """
254
- response = await self._embedding_client.chat.completions.create(
255
- model=self._ollama_model,
256
- messages=[{"role": "user", "content": prompt}],
257
- )
259
+ api_kwargs: dict[str, Any] = {
260
+ "model": self._ollama_model,
261
+ "messages": [{"role": "user", "content": prompt}],
262
+ }
263
+ if options and options.max_tokens is not None:
264
+ api_kwargs["max_completion_tokens"] = options.max_tokens
265
+ if options and options.temperature is not None:
266
+ api_kwargs["temperature"] = options.temperature
267
+ if options and options.top_p is not None:
268
+ api_kwargs["top_p"] = options.top_p
269
+ if options and options.n is not None:
270
+ api_kwargs["n"] = options.n
271
+ if options and options.seed is not None:
272
+ api_kwargs["seed"] = options.seed
273
+ response = await self._embedding_client.chat.completions.create(**api_kwargs)
258
274
  return response.choices[0].message.content or ""
259
275
 
260
- def generate_text(self, prompt: str, *, json_mode: bool = False) -> str:
276
+ def generate_text(
277
+ self,
278
+ prompt: str,
279
+ *,
280
+ json_mode: bool = False,
281
+ options: GenerationOptions | None = None,
282
+ ) -> str:
261
283
  """Generate text from a prompt (sync).
262
284
 
263
285
  Args:
@@ -279,12 +301,14 @@ class OllamaAdapter(BaseLLMAdapter):
279
301
  import nest_asyncio
280
302
 
281
303
  nest_asyncio.apply()
282
- return loop.run_until_complete(self.agenerate_text(prompt))
304
+ return loop.run_until_complete(self.agenerate_text(prompt, options=options))
283
305
  except ImportError:
284
306
  import concurrent.futures
285
307
 
286
308
  with concurrent.futures.ThreadPoolExecutor() as executor:
287
- future = executor.submit(asyncio.run, self.agenerate_text(prompt))
309
+ future = executor.submit(
310
+ asyncio.run, self.agenerate_text(prompt, options=options)
311
+ )
288
312
  return future.result()
289
313
  else:
290
- return asyncio.run(self.agenerate_text(prompt))
314
+ return asyncio.run(self.agenerate_text(prompt, options=options))
@@ -10,6 +10,7 @@ from evalvault.adapters.outbound.llm.instructor_factory import create_instructor
10
10
  from evalvault.adapters.outbound.llm.token_aware_chat import TokenTrackingAsyncOpenAI
11
11
  from evalvault.config.phoenix_support import instrumentation_span, set_span_attributes
12
12
  from evalvault.config.settings import Settings
13
+ from evalvault.ports.outbound.llm_port import GenerationOptions
13
14
 
14
15
  _DEFAULT_MAX_COMPLETION_TOKENS = 8192
15
16
  _GPT5_MAX_COMPLETION_TOKENS = 16384
@@ -73,7 +74,12 @@ class OpenAIAdapter(BaseLLMAdapter):
73
74
  """
74
75
  return self._embedding_model_name
75
76
 
76
- async def agenerate_text(self, prompt: str) -> str:
77
+ async def agenerate_text(
78
+ self,
79
+ prompt: str,
80
+ *,
81
+ options: GenerationOptions | None = None,
82
+ ) -> str:
77
83
  """Generate text from a prompt (async).
78
84
 
79
85
  Uses the OpenAI chat completions API directly for simple text generation.
@@ -89,18 +95,35 @@ class OpenAIAdapter(BaseLLMAdapter):
89
95
  "llm.model": self._model_name,
90
96
  "llm.mode": "async",
91
97
  }
98
+ max_tokens = options.max_tokens if options and options.max_tokens is not None else None
99
+ api_kwargs = {
100
+ "model": self._model_name,
101
+ "messages": [{"role": "user", "content": prompt}],
102
+ "max_completion_tokens": max_tokens
103
+ or _max_completion_tokens_for_model(self._model_name),
104
+ }
105
+ if options and options.temperature is not None:
106
+ api_kwargs["temperature"] = options.temperature
107
+ if options and options.top_p is not None:
108
+ api_kwargs["top_p"] = options.top_p
109
+ if options and options.n is not None:
110
+ api_kwargs["n"] = options.n
111
+ if options and options.seed is not None:
112
+ api_kwargs["seed"] = options.seed
92
113
  with instrumentation_span("llm.generate_text", attrs) as span:
93
- response = await self._client.chat.completions.create(
94
- model=self._model_name,
95
- messages=[{"role": "user", "content": prompt}],
96
- max_completion_tokens=_max_completion_tokens_for_model(self._model_name),
97
- )
114
+ response = await self._client.chat.completions.create(**api_kwargs)
98
115
  content = response.choices[0].message.content or ""
99
116
  if span:
100
117
  set_span_attributes(span, {"llm.response.length": len(content)})
101
118
  return content
102
119
 
103
- def generate_text(self, prompt: str, *, json_mode: bool = False) -> str:
120
+ def generate_text(
121
+ self,
122
+ prompt: str,
123
+ *,
124
+ json_mode: bool = False,
125
+ options: GenerationOptions | None = None,
126
+ ) -> str:
104
127
  """Generate text from a prompt (sync).
105
128
 
106
129
  Uses sync OpenAI client directly.
@@ -124,11 +147,21 @@ class OpenAIAdapter(BaseLLMAdapter):
124
147
  sync_client = OpenAI(**client_kwargs)
125
148
 
126
149
  # API 호출 파라미터
150
+ max_tokens = options.max_tokens if options and options.max_tokens is not None else None
127
151
  api_kwargs: dict = {
128
152
  "model": self._model_name,
129
153
  "messages": [{"role": "user", "content": prompt}],
130
- "max_completion_tokens": _max_completion_tokens_for_model(self._model_name),
154
+ "max_completion_tokens": max_tokens
155
+ or _max_completion_tokens_for_model(self._model_name),
131
156
  }
157
+ if options and options.temperature is not None:
158
+ api_kwargs["temperature"] = options.temperature
159
+ if options and options.top_p is not None:
160
+ api_kwargs["top_p"] = options.top_p
161
+ if options and options.n is not None:
162
+ api_kwargs["n"] = options.n
163
+ if options and options.seed is not None:
164
+ api_kwargs["seed"] = options.seed
132
165
 
133
166
  # JSON 모드 설정
134
167
  if json_mode: