evalvault 1.73.2__py3-none-any.whl → 1.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +66 -17
- evalvault/adapters/inbound/api/routers/calibration.py +9 -9
- evalvault/adapters/inbound/api/routers/chat.py +604 -37
- evalvault/adapters/inbound/api/routers/domain.py +10 -5
- evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
- evalvault/adapters/inbound/api/routers/runs.py +23 -4
- evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
- evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
- evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
- evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
- evalvault/adapters/inbound/cli/commands/compare.py +2 -7
- evalvault/adapters/inbound/cli/commands/debug.py +3 -2
- evalvault/adapters/inbound/cli/commands/domain.py +12 -12
- evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
- evalvault/adapters/inbound/cli/commands/gate.py +3 -2
- evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
- evalvault/adapters/inbound/cli/commands/history.py +3 -12
- evalvault/adapters/inbound/cli/commands/method.py +1 -2
- evalvault/adapters/inbound/cli/commands/ops.py +2 -2
- evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
- evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
- evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
- evalvault/adapters/inbound/cli/commands/regress.py +5 -4
- evalvault/adapters/inbound/cli/commands/run.py +42 -31
- evalvault/adapters/inbound/cli/commands/run_helpers.py +24 -15
- evalvault/adapters/inbound/cli/commands/stage.py +6 -25
- evalvault/adapters/inbound/cli/utils/options.py +10 -4
- evalvault/adapters/inbound/mcp/tools.py +11 -8
- evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
- evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
- evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
- evalvault/adapters/outbound/domain_memory/factory.py +68 -0
- evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
- evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
- evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
- evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
- evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
- evalvault/adapters/outbound/ops/__init__.py +5 -0
- evalvault/adapters/outbound/ops/report_renderer.py +159 -0
- evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
- evalvault/adapters/outbound/storage/base_sql.py +3 -2
- evalvault/adapters/outbound/storage/factory.py +53 -0
- evalvault/adapters/outbound/storage/postgres_adapter.py +90 -0
- evalvault/adapters/outbound/storage/postgres_schema.sql +15 -0
- evalvault/adapters/outbound/storage/schema.sql +14 -0
- evalvault/adapters/outbound/storage/sqlite_adapter.py +77 -0
- evalvault/config/settings.py +31 -7
- evalvault/domain/entities/ops_report.py +40 -0
- evalvault/domain/services/domain_learning_hook.py +2 -1
- evalvault/domain/services/ops_report_service.py +192 -0
- evalvault/ports/inbound/web_port.py +3 -1
- evalvault/ports/outbound/storage_port.py +2 -0
- evalvault-1.75.0.dist-info/METADATA +221 -0
- {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/RECORD +57 -48
- evalvault-1.73.2.dist-info/METADATA +0 -585
- {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/WHEEL +0 -0
- {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.73.2.dist-info → evalvault-1.75.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -10,7 +10,7 @@ import typer
|
|
|
10
10
|
from rich.console import Console
|
|
11
11
|
|
|
12
12
|
from evalvault.adapters.outbound.filesystem.difficulty_profile_writer import DifficultyProfileWriter
|
|
13
|
-
from evalvault.adapters.outbound.storage.
|
|
13
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
14
14
|
from evalvault.config.settings import Settings
|
|
15
15
|
from evalvault.domain.services.difficulty_profile_reporter import DifficultyProfileReporter
|
|
16
16
|
from evalvault.domain.services.difficulty_profiling_service import (
|
|
@@ -58,7 +58,7 @@ def register_profile_difficulty_commands(
|
|
|
58
58
|
concurrency: int | None = typer.Option(
|
|
59
59
|
None, "--concurrency", help="Max concurrency when parallel is enabled.", min=1
|
|
60
60
|
),
|
|
61
|
-
db_path: Path | None = db_option(help_text="
|
|
61
|
+
db_path: Path | None = db_option(help_text="DB path."),
|
|
62
62
|
) -> None:
|
|
63
63
|
if not dataset_name and not run_id:
|
|
64
64
|
print_cli_error(
|
|
@@ -75,15 +75,6 @@ def register_profile_difficulty_commands(
|
|
|
75
75
|
)
|
|
76
76
|
raise typer.Exit(1)
|
|
77
77
|
|
|
78
|
-
resolved_db_path = db_path or Settings().evalvault_db_path
|
|
79
|
-
if resolved_db_path is None:
|
|
80
|
-
print_cli_error(
|
|
81
|
-
console,
|
|
82
|
-
"DB 경로가 필요합니다.",
|
|
83
|
-
fixes=["--db 옵션으로 SQLite DB 경로를 지정하세요."],
|
|
84
|
-
)
|
|
85
|
-
raise typer.Exit(1)
|
|
86
|
-
|
|
87
78
|
metric_list = parse_csv_option(metrics)
|
|
88
79
|
if metric_list:
|
|
89
80
|
validate_choices(metric_list, available_metrics, console, value_label="metric")
|
|
@@ -94,7 +85,7 @@ def register_profile_difficulty_commands(
|
|
|
94
85
|
resolved_output = output_path or Path("reports") / "difficulty" / f"{prefix}.json"
|
|
95
86
|
resolved_artifacts_dir = artifacts_dir or resolved_output.parent / "artifacts" / prefix
|
|
96
87
|
|
|
97
|
-
storage =
|
|
88
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
98
89
|
writer = DifficultyProfileWriter()
|
|
99
90
|
reporter = DifficultyProfileReporter(writer)
|
|
100
91
|
service = DifficultyProfilingService(storage=storage, reporter=reporter)
|
|
@@ -13,7 +13,7 @@ from rich.table import Table
|
|
|
13
13
|
|
|
14
14
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
15
15
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
16
|
-
from evalvault.adapters.outbound.storage.
|
|
16
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
17
17
|
from evalvault.config.settings import Settings, apply_profile
|
|
18
18
|
from evalvault.domain.entities import Dataset, EvaluationRun, PromptSetBundle, TestCase
|
|
19
19
|
from evalvault.domain.services.evaluator import RagasEvaluator
|
|
@@ -42,17 +42,6 @@ def _bundle_to_role_map(bundle: PromptSetBundle) -> dict[str, dict[str, str]]:
|
|
|
42
42
|
return roles
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def _require_db_path(console: Console, db_path: Path | None) -> Path:
|
|
46
|
-
if db_path is None:
|
|
47
|
-
print_cli_error(
|
|
48
|
-
console,
|
|
49
|
-
"DB 경로가 필요합니다.",
|
|
50
|
-
fixes=["--db 옵션으로 SQLite DB 경로를 지정하세요."],
|
|
51
|
-
)
|
|
52
|
-
raise typer.Exit(1)
|
|
53
|
-
return db_path
|
|
54
|
-
|
|
55
|
-
|
|
56
45
|
def _default_role(bundle: PromptSetBundle) -> str | None:
|
|
57
46
|
for item in bundle.items:
|
|
58
47
|
if item.role == "system":
|
|
@@ -229,8 +218,7 @@ def create_prompts_app(console: Console) -> typer.Typer:
|
|
|
229
218
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
230
219
|
) -> None:
|
|
231
220
|
"""Show prompt snapshots attached to a run."""
|
|
232
|
-
|
|
233
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db)
|
|
221
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
234
222
|
bundle = storage.get_prompt_set_for_run(run_id)
|
|
235
223
|
if not bundle:
|
|
236
224
|
console.print("[yellow]No prompt set found for this run.[/yellow]")
|
|
@@ -273,8 +261,7 @@ def create_prompts_app(console: Console) -> typer.Typer:
|
|
|
273
261
|
),
|
|
274
262
|
) -> None:
|
|
275
263
|
"""Compare prompt snapshots between two runs."""
|
|
276
|
-
|
|
277
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db)
|
|
264
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
278
265
|
bundle_a = storage.get_prompt_set_for_run(run_id_a)
|
|
279
266
|
bundle_b = storage.get_prompt_set_for_run(run_id_b)
|
|
280
267
|
|
|
@@ -462,8 +449,7 @@ def create_prompts_app(console: Console) -> typer.Typer:
|
|
|
462
449
|
) -> None:
|
|
463
450
|
"""Suggest prompt improvements by scoring candidate prompts."""
|
|
464
451
|
|
|
465
|
-
|
|
466
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db)
|
|
452
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
467
453
|
|
|
468
454
|
try:
|
|
469
455
|
run = storage.get_run(run_id)
|
|
@@ -20,7 +20,8 @@ from evalvault.adapters.outbound.report.ci_report_formatter import (
|
|
|
20
20
|
from evalvault.adapters.outbound.report.pr_comment_formatter import (
|
|
21
21
|
format_ci_gate_pr_comment,
|
|
22
22
|
)
|
|
23
|
-
from evalvault.adapters.outbound.storage.
|
|
23
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
24
|
+
from evalvault.config.settings import Settings
|
|
24
25
|
from evalvault.domain.services.regression_gate_service import (
|
|
25
26
|
RegressionGateReport,
|
|
26
27
|
RegressionGateService,
|
|
@@ -133,7 +134,7 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
133
134
|
validate_choice(test, ["t-test", "mann-whitney"], console, value_label="test")
|
|
134
135
|
metric_list = parse_csv_option(metrics)
|
|
135
136
|
|
|
136
|
-
storage =
|
|
137
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
137
138
|
analysis_adapter = StatisticalAnalysisAdapter()
|
|
138
139
|
service = RegressionGateService(storage=storage, analysis_adapter=analysis_adapter)
|
|
139
140
|
|
|
@@ -243,7 +244,7 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
243
244
|
value_label="format",
|
|
244
245
|
)
|
|
245
246
|
|
|
246
|
-
storage =
|
|
247
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
247
248
|
analysis_adapter = StatisticalAnalysisAdapter()
|
|
248
249
|
service = RegressionGateService(storage=storage, analysis_adapter=analysis_adapter)
|
|
249
250
|
|
|
@@ -414,7 +415,7 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
414
415
|
console.print("[red]Error:[/red] Database path is not configured.")
|
|
415
416
|
raise typer.Exit(1)
|
|
416
417
|
|
|
417
|
-
storage =
|
|
418
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
418
419
|
|
|
419
420
|
if action == "set":
|
|
420
421
|
if not run_id:
|
|
@@ -21,7 +21,7 @@ from evalvault.adapters.outbound.dataset import get_loader, load_multiturn_datas
|
|
|
21
21
|
from evalvault.adapters.outbound.documents.versioned_loader import (
|
|
22
22
|
load_versioned_chunks_from_pdf_dir,
|
|
23
23
|
)
|
|
24
|
-
from evalvault.adapters.outbound.domain_memory
|
|
24
|
+
from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
|
|
25
25
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
26
26
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
27
27
|
from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
@@ -30,7 +30,7 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
|
30
30
|
PhoenixSyncService,
|
|
31
31
|
build_experiment_metadata,
|
|
32
32
|
)
|
|
33
|
-
from evalvault.adapters.outbound.storage.
|
|
33
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
34
34
|
from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
|
|
35
35
|
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
36
36
|
from evalvault.config.settings import Settings, apply_profile
|
|
@@ -57,6 +57,7 @@ from evalvault.domain.services.ragas_prompt_overrides import (
|
|
|
57
57
|
)
|
|
58
58
|
from evalvault.domain.services.retriever_context import apply_versioned_retriever_to_dataset
|
|
59
59
|
from evalvault.domain.services.stage_event_builder import StageEventBuilder
|
|
60
|
+
from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
|
|
60
61
|
from evalvault.ports.outbound.korean_nlp_port import RetrieverPort
|
|
61
62
|
|
|
62
63
|
from ..utils.analysis_io import (
|
|
@@ -111,7 +112,7 @@ def _build_dense_retriever(
|
|
|
111
112
|
settings: Settings,
|
|
112
113
|
profile_name: str | None,
|
|
113
114
|
) -> Any:
|
|
114
|
-
"""Build and index a dense retriever, preferring
|
|
115
|
+
"""Build and index a dense retriever, preferring OpenAI-compatible embeddings when available."""
|
|
115
116
|
|
|
116
117
|
from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
|
|
117
118
|
|
|
@@ -135,6 +136,17 @@ def _build_dense_retriever(
|
|
|
135
136
|
dense_retriever.index(documents)
|
|
136
137
|
return dense_retriever
|
|
137
138
|
|
|
139
|
+
if settings.llm_provider == "vllm":
|
|
140
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
141
|
+
|
|
142
|
+
adapter = VLLMAdapter(settings)
|
|
143
|
+
dense_retriever = KoreanDenseRetriever(
|
|
144
|
+
model_name=settings.vllm_embedding_model,
|
|
145
|
+
ollama_adapter=adapter,
|
|
146
|
+
)
|
|
147
|
+
dense_retriever.index(documents)
|
|
148
|
+
return dense_retriever
|
|
149
|
+
|
|
138
150
|
try:
|
|
139
151
|
dense_retriever = KoreanDenseRetriever()
|
|
140
152
|
dense_retriever.index(documents)
|
|
@@ -142,7 +154,8 @@ def _build_dense_retriever(
|
|
|
142
154
|
except Exception as exc:
|
|
143
155
|
raise RuntimeError(
|
|
144
156
|
"Dense retriever initialization failed. "
|
|
145
|
-
"Use --profile dev/prod (Ollama embedding),
|
|
157
|
+
"Use --profile dev/prod (Ollama embedding), --profile vllm (vLLM embedding), "
|
|
158
|
+
"or install/prepare a local embedding model."
|
|
146
159
|
) from exc
|
|
147
160
|
|
|
148
161
|
|
|
@@ -1729,7 +1742,7 @@ def register_run_commands(
|
|
|
1729
1742
|
|
|
1730
1743
|
assert llm_adapter is not None
|
|
1731
1744
|
|
|
1732
|
-
memory_adapter:
|
|
1745
|
+
memory_adapter: DomainMemoryPort | None = None
|
|
1733
1746
|
memory_evaluator: MemoryAwareEvaluator | None = None
|
|
1734
1747
|
memory_domain_name = memory_domain or ds.metadata.get("domain") or "default"
|
|
1735
1748
|
memory_required = domain_memory_requested
|
|
@@ -1751,8 +1764,15 @@ def register_run_commands(
|
|
|
1751
1764
|
f"Domain Memory 초기화 시작 (domain={memory_domain_name}, lang={memory_language})",
|
|
1752
1765
|
)
|
|
1753
1766
|
try:
|
|
1754
|
-
|
|
1755
|
-
|
|
1767
|
+
if memory_db:
|
|
1768
|
+
memory_db_path = memory_db
|
|
1769
|
+
elif settings.db_backend == "sqlite":
|
|
1770
|
+
memory_db_path = settings.evalvault_memory_db_path
|
|
1771
|
+
else:
|
|
1772
|
+
memory_db_path = None
|
|
1773
|
+
memory_adapter = build_domain_memory_adapter(
|
|
1774
|
+
settings=settings, db_path=Path(memory_db_path) if memory_db_path else None
|
|
1775
|
+
)
|
|
1756
1776
|
memory_evaluator = MemoryAwareEvaluator(
|
|
1757
1777
|
evaluator=evaluator,
|
|
1758
1778
|
memory_port=memory_adapter,
|
|
@@ -2161,16 +2181,9 @@ def register_run_commands(
|
|
|
2161
2181
|
stored = _write_stage_events_jsonl(stage_events, stage_event_payload)
|
|
2162
2182
|
console.print(f"[green]Saved {stored} stage event(s).[/green]")
|
|
2163
2183
|
if stage_store:
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
2168
|
-
else:
|
|
2169
|
-
print_cli_warning(
|
|
2170
|
-
console,
|
|
2171
|
-
"Stage 이벤트를 저장하려면 --db 경로가 필요합니다.",
|
|
2172
|
-
tips=["--db <sqlite_path> 옵션을 함께 지정하세요."],
|
|
2173
|
-
)
|
|
2184
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2185
|
+
stored = storage.save_stage_events(stage_event_payload)
|
|
2186
|
+
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
2174
2187
|
|
|
2175
2188
|
if effective_tracker != "none":
|
|
2176
2189
|
phoenix_opts = None
|
|
@@ -2194,23 +2207,21 @@ def register_run_commands(
|
|
|
2194
2207
|
log_phoenix_traces_fn=log_phoenix_traces,
|
|
2195
2208
|
)
|
|
2196
2209
|
_log_duration(console, verbose, "Tracker 로깅 완료", tracker_started_at)
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
)
|
|
2208
|
-
_log_duration(console, verbose, "DB 저장 완료", db_started_at)
|
|
2210
|
+
db_started_at = datetime.now()
|
|
2211
|
+
_log_timestamp(console, verbose, "DB 저장 시작")
|
|
2212
|
+
_save_to_db(
|
|
2213
|
+
db_path,
|
|
2214
|
+
result,
|
|
2215
|
+
console,
|
|
2216
|
+
prompt_bundle=prompt_bundle,
|
|
2217
|
+
export_excel=excel_output is None,
|
|
2218
|
+
)
|
|
2219
|
+
_log_duration(console, verbose, "DB 저장 완료", db_started_at)
|
|
2209
2220
|
if excel_output:
|
|
2210
2221
|
excel_started_at = datetime.now()
|
|
2211
2222
|
_log_timestamp(console, verbose, f"엑셀 저장 시작 ({excel_output})")
|
|
2212
2223
|
try:
|
|
2213
|
-
storage =
|
|
2224
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2214
2225
|
storage.export_run_to_excel(result.run_id, excel_output)
|
|
2215
2226
|
console.print(f"[green]Excel export saved: {excel_output}[/green]")
|
|
2216
2227
|
except Exception as exc:
|
|
@@ -2242,7 +2253,7 @@ def register_run_commands(
|
|
|
2242
2253
|
prefix=analysis_prefix,
|
|
2243
2254
|
)
|
|
2244
2255
|
console.print("\n[bold]자동 분석 실행[/bold]")
|
|
2245
|
-
storage =
|
|
2256
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2246
2257
|
pipeline_service = build_analysis_pipeline_service(
|
|
2247
2258
|
storage=storage,
|
|
2248
2259
|
llm_adapter=llm_adapter,
|
|
@@ -18,7 +18,8 @@ from rich.table import Table
|
|
|
18
18
|
from evalvault.adapters.outbound.dataset import StreamingConfig, StreamingDatasetLoader
|
|
19
19
|
from evalvault.adapters.outbound.dataset.thresholds import extract_thresholds_from_rows
|
|
20
20
|
from evalvault.adapters.outbound.kg.networkx_adapter import NetworkXKnowledgeGraph
|
|
21
|
-
from evalvault.adapters.outbound.storage.
|
|
21
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
22
|
+
from evalvault.adapters.outbound.storage.postgres_adapter import PostgreSQLStorageAdapter
|
|
22
23
|
from evalvault.config.phoenix_support import (
|
|
23
24
|
get_phoenix_trace_url,
|
|
24
25
|
instrumentation_span,
|
|
@@ -454,18 +455,21 @@ def _log_to_tracker(
|
|
|
454
455
|
|
|
455
456
|
|
|
456
457
|
def _save_to_db(
|
|
457
|
-
db_path: Path,
|
|
458
|
+
db_path: Path | None,
|
|
458
459
|
result,
|
|
459
460
|
console: Console,
|
|
460
461
|
*,
|
|
461
|
-
storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
|
|
462
462
|
prompt_bundle: PromptSetBundle | None = None,
|
|
463
463
|
export_excel: bool = True,
|
|
464
464
|
) -> None:
|
|
465
|
-
"""Persist evaluation run (and optional prompt set) to
|
|
466
|
-
|
|
465
|
+
"""Persist evaluation run (and optional prompt set) to database."""
|
|
466
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
467
|
+
storage_label = (
|
|
468
|
+
"PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
|
|
469
|
+
)
|
|
470
|
+
export_base = db_path.parent if db_path else Path("data/exports")
|
|
471
|
+
with console.status(f"[bold green]Saving to database {storage_label}..."):
|
|
467
472
|
try:
|
|
468
|
-
storage = storage_cls(db_path=db_path)
|
|
469
473
|
if prompt_bundle:
|
|
470
474
|
storage.save_prompt_set(prompt_bundle)
|
|
471
475
|
storage.save_run(result)
|
|
@@ -475,7 +479,8 @@ def _save_to_db(
|
|
|
475
479
|
prompt_bundle.prompt_set.prompt_set_id,
|
|
476
480
|
)
|
|
477
481
|
if export_excel:
|
|
478
|
-
|
|
482
|
+
export_base.mkdir(parents=True, exist_ok=True)
|
|
483
|
+
excel_path = export_base / f"evalvault_run_{result.run_id}.xlsx"
|
|
479
484
|
try:
|
|
480
485
|
storage.export_run_to_excel(result.run_id, excel_path)
|
|
481
486
|
console.print(f"[green]Excel export saved: {excel_path}[/green]")
|
|
@@ -485,7 +490,7 @@ def _save_to_db(
|
|
|
485
490
|
"엑셀 내보내기에 실패했습니다.",
|
|
486
491
|
tips=[str(exc)],
|
|
487
492
|
)
|
|
488
|
-
console.print(f"[green]Results saved to database: {
|
|
493
|
+
console.print(f"[green]Results saved to database: {storage_label}[/green]")
|
|
489
494
|
console.print(f"[dim]Run ID: {result.run_id}[/dim]")
|
|
490
495
|
if prompt_bundle:
|
|
491
496
|
console.print(
|
|
@@ -502,21 +507,24 @@ def _save_to_db(
|
|
|
502
507
|
|
|
503
508
|
|
|
504
509
|
def _save_multiturn_to_db(
|
|
505
|
-
db_path: Path,
|
|
510
|
+
db_path: Path | None,
|
|
506
511
|
run_record: MultiTurnRunRecord,
|
|
507
512
|
conversations: list[MultiTurnConversationRecord],
|
|
508
513
|
turn_results: list[MultiTurnTurnResult],
|
|
509
514
|
console: Console,
|
|
510
515
|
*,
|
|
511
|
-
storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
|
|
512
516
|
export_excel: bool = True,
|
|
513
517
|
excel_output_path: Path | None = None,
|
|
514
518
|
metric_thresholds: dict[str, float] | None = None,
|
|
515
519
|
) -> None:
|
|
516
|
-
"""Persist multiturn evaluation run to
|
|
517
|
-
|
|
520
|
+
"""Persist multiturn evaluation run to database."""
|
|
521
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
522
|
+
storage_label = (
|
|
523
|
+
"PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
|
|
524
|
+
)
|
|
525
|
+
export_base = db_path.parent if db_path else Path("data/exports")
|
|
526
|
+
with console.status(f"[bold green]Saving multiturn run to {storage_label}..."):
|
|
518
527
|
try:
|
|
519
|
-
storage = storage_cls(db_path=db_path)
|
|
520
528
|
storage.save_multiturn_run(
|
|
521
529
|
run_record,
|
|
522
530
|
conversations,
|
|
@@ -524,8 +532,9 @@ def _save_multiturn_to_db(
|
|
|
524
532
|
metric_thresholds=metric_thresholds,
|
|
525
533
|
)
|
|
526
534
|
if export_excel:
|
|
535
|
+
export_base.mkdir(parents=True, exist_ok=True)
|
|
527
536
|
excel_path = excel_output_path or (
|
|
528
|
-
|
|
537
|
+
export_base / f"evalvault_multiturn_{run_record.run_id}.xlsx"
|
|
529
538
|
)
|
|
530
539
|
try:
|
|
531
540
|
storage.export_multiturn_run_to_excel(run_record.run_id, excel_path)
|
|
@@ -536,7 +545,7 @@ def _save_multiturn_to_db(
|
|
|
536
545
|
"멀티턴 엑셀 내보내기에 실패했습니다.",
|
|
537
546
|
tips=[str(exc)],
|
|
538
547
|
)
|
|
539
|
-
console.print(f"[green]Multiturn results saved to database: {
|
|
548
|
+
console.print(f"[green]Multiturn results saved to database: {storage_label}[/green]")
|
|
540
549
|
console.print(f"[dim]Run ID: {run_record.run_id}[/dim]")
|
|
541
550
|
except Exception as exc: # pragma: no cover - persistence errors
|
|
542
551
|
print_cli_error(
|
|
@@ -16,7 +16,7 @@ from rich.table import Table
|
|
|
16
16
|
from evalvault.adapters.outbound.improvement.stage_metric_playbook_loader import (
|
|
17
17
|
StageMetricPlaybookLoader,
|
|
18
18
|
)
|
|
19
|
-
from evalvault.adapters.outbound.storage.
|
|
19
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
20
20
|
from evalvault.config.settings import Settings
|
|
21
21
|
from evalvault.domain.entities.stage import REQUIRED_STAGE_TYPES, StageEvent, StageMetric
|
|
22
22
|
from evalvault.domain.services.stage_metric_guide_service import StageMetricGuideService
|
|
@@ -28,13 +28,6 @@ from ..utils.options import db_option
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def _resolve_db_path(db_path: Path | None) -> Path:
|
|
32
|
-
resolved = db_path or Settings().evalvault_db_path
|
|
33
|
-
if resolved is None:
|
|
34
|
-
raise typer.BadParameter("Database path is not configured.")
|
|
35
|
-
return resolved
|
|
36
|
-
|
|
37
|
-
|
|
38
31
|
@dataclass
|
|
39
32
|
class ValidationStats:
|
|
40
33
|
"""Tracks StageEvent validation failures by error type."""
|
|
@@ -122,8 +115,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
122
115
|
console.print("[yellow]No valid stage events found in the input file.[/yellow]")
|
|
123
116
|
raise typer.Exit(1)
|
|
124
117
|
|
|
125
|
-
|
|
126
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
118
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
127
119
|
stored = storage.save_stage_events(events)
|
|
128
120
|
|
|
129
121
|
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
@@ -147,8 +139,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
147
139
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
148
140
|
) -> None:
|
|
149
141
|
"""List stage events for a run."""
|
|
150
|
-
|
|
151
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
142
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
152
143
|
events = storage.list_stage_events(run_id, stage_type=stage_type)
|
|
153
144
|
|
|
154
145
|
if not events:
|
|
@@ -184,8 +175,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
184
175
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
185
176
|
) -> None:
|
|
186
177
|
"""Show summary stats for stage events."""
|
|
187
|
-
|
|
188
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
178
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
189
179
|
events = storage.list_stage_events(run_id)
|
|
190
180
|
if not events:
|
|
191
181
|
console.print("[yellow]No stage events found.[/yellow]")
|
|
@@ -218,8 +208,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
218
208
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
219
209
|
) -> None:
|
|
220
210
|
"""Compute stage metrics from stored events."""
|
|
221
|
-
|
|
222
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
211
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
223
212
|
events = storage.list_stage_events(run_id)
|
|
224
213
|
if not events:
|
|
225
214
|
console.print("[yellow]No stage events found.[/yellow]")
|
|
@@ -276,8 +265,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
276
265
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
277
266
|
) -> None:
|
|
278
267
|
"""Report stage summary, metrics, and improvement guides."""
|
|
279
|
-
|
|
280
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
268
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
281
269
|
events = storage.list_stage_events(run_id)
|
|
282
270
|
if not events:
|
|
283
271
|
console.print("[yellow]No stage events found.[/yellow]")
|
|
@@ -547,13 +535,6 @@ def _load_default_profile() -> str | None:
|
|
|
547
535
|
return None
|
|
548
536
|
|
|
549
537
|
|
|
550
|
-
def _resolve_db_path(db_path: Path | None) -> Path:
|
|
551
|
-
resolved = db_path or Settings().evalvault_db_path
|
|
552
|
-
if resolved is None:
|
|
553
|
-
raise typer.BadParameter("Database path is not configured.")
|
|
554
|
-
return resolved
|
|
555
|
-
|
|
556
|
-
|
|
557
538
|
def _print_stage_summary(console: Console, summary_data) -> None:
|
|
558
539
|
table = Table(show_header=True, header_style="bold cyan")
|
|
559
540
|
table.add_column("Stage Type")
|
|
@@ -31,11 +31,11 @@ def profile_option(
|
|
|
31
31
|
def db_option(
|
|
32
32
|
*,
|
|
33
33
|
default: str | Path | None = _UNSET,
|
|
34
|
-
help_text: str = "
|
|
34
|
+
help_text: str = "SQLite DB path (PostgreSQL is default when omitted).",
|
|
35
35
|
) -> Path | None:
|
|
36
36
|
"""Shared --db / -D option definition."""
|
|
37
37
|
|
|
38
|
-
resolved_default =
|
|
38
|
+
resolved_default = None if default is _UNSET else default
|
|
39
39
|
normalized_default = _normalize_path(resolved_default)
|
|
40
40
|
return typer.Option(
|
|
41
41
|
normalized_default,
|
|
@@ -49,11 +49,17 @@ def db_option(
|
|
|
49
49
|
def memory_db_option(
|
|
50
50
|
*,
|
|
51
51
|
default: str | Path | None = _UNSET,
|
|
52
|
-
help_text: str = "
|
|
52
|
+
help_text: str = "Domain Memory SQLite path (Postgres is default when omitted).",
|
|
53
53
|
) -> Path | None:
|
|
54
54
|
"""Shared option factory for the domain memory database path."""
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
if default is _UNSET:
|
|
57
|
+
settings = Settings()
|
|
58
|
+
resolved_default = (
|
|
59
|
+
settings.evalvault_memory_db_path if settings.db_backend == "sqlite" else None
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
resolved_default = default
|
|
57
63
|
normalized_default = _normalize_path(resolved_default)
|
|
58
64
|
return typer.Option(
|
|
59
65
|
normalized_default,
|
|
@@ -20,12 +20,13 @@ from evalvault.adapters.outbound.analysis.pipeline_factory import build_analysis
|
|
|
20
20
|
from evalvault.adapters.outbound.analysis.statistical_adapter import StatisticalAnalysisAdapter
|
|
21
21
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
22
22
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
23
|
-
from evalvault.adapters.outbound.storage.
|
|
23
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
24
24
|
from evalvault.config.settings import Settings, apply_profile
|
|
25
25
|
from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
|
|
26
26
|
from evalvault.domain.services.analysis_service import AnalysisService
|
|
27
27
|
from evalvault.domain.services.evaluator import RagasEvaluator
|
|
28
28
|
from evalvault.ports.inbound.web_port import EvalRequest, RunFilters, RunSummary
|
|
29
|
+
from evalvault.ports.outbound.storage_port import StoragePort
|
|
29
30
|
|
|
30
31
|
from .schemas import (
|
|
31
32
|
AnalyzeCompareRequest,
|
|
@@ -82,7 +83,7 @@ def list_runs(payload: dict[str, Any] | ListRunsRequest) -> ListRunsResponse:
|
|
|
82
83
|
errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)]
|
|
83
84
|
)
|
|
84
85
|
|
|
85
|
-
storage =
|
|
86
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
86
87
|
adapter = WebUIAdapter(storage=storage, settings=Settings())
|
|
87
88
|
|
|
88
89
|
filters = RunFilters(
|
|
@@ -123,7 +124,7 @@ def get_run_summary(payload: dict[str, Any] | GetRunSummaryRequest) -> GetRunSum
|
|
|
123
124
|
errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)]
|
|
124
125
|
)
|
|
125
126
|
|
|
126
|
-
storage =
|
|
127
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
127
128
|
try:
|
|
128
129
|
run = storage.get_run(request.run_id)
|
|
129
130
|
except KeyError as exc:
|
|
@@ -175,7 +176,7 @@ def run_evaluation(payload: dict[str, Any] | RunEvaluationRequest) -> RunEvaluat
|
|
|
175
176
|
errors=[_error("EVAL_LLM_INIT_FAILED", str(exc), stage=ErrorStage.evaluate)],
|
|
176
177
|
)
|
|
177
178
|
|
|
178
|
-
storage =
|
|
179
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
179
180
|
llm_factory = SettingsLLMFactory(settings)
|
|
180
181
|
korean_toolkit = try_create_korean_toolkit()
|
|
181
182
|
evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
|
|
@@ -266,7 +267,7 @@ def analyze_compare(payload: dict[str, Any] | AnalyzeCompareRequest) -> AnalyzeC
|
|
|
266
267
|
errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)],
|
|
267
268
|
)
|
|
268
269
|
|
|
269
|
-
storage =
|
|
270
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
270
271
|
try:
|
|
271
272
|
run_a = storage.get_run(request.run_id_a)
|
|
272
273
|
run_b = storage.get_run(request.run_id_b)
|
|
@@ -503,9 +504,11 @@ def _serialize_run_summary(summary: RunSummary) -> RunSummaryPayload:
|
|
|
503
504
|
return RunSummaryPayload.model_validate(payload)
|
|
504
505
|
|
|
505
506
|
|
|
506
|
-
def _resolve_db_path(db_path: Path | None) -> Path:
|
|
507
|
+
def _resolve_db_path(db_path: Path | None) -> Path | None:
|
|
508
|
+
settings = Settings()
|
|
507
509
|
if db_path is None:
|
|
508
|
-
settings
|
|
510
|
+
if getattr(settings, "db_backend", "postgres") != "sqlite":
|
|
511
|
+
return None
|
|
509
512
|
db_path = Path(settings.evalvault_db_path)
|
|
510
513
|
resolved = db_path.expanduser().resolve()
|
|
511
514
|
_ensure_allowed_path(resolved)
|
|
@@ -547,7 +550,7 @@ def _run_auto_analysis(
|
|
|
547
550
|
*,
|
|
548
551
|
run_id: str,
|
|
549
552
|
run: Any,
|
|
550
|
-
storage:
|
|
553
|
+
storage: StoragePort,
|
|
551
554
|
llm_adapter: Any,
|
|
552
555
|
analysis_output: Path | None,
|
|
553
556
|
analysis_report: Path | None,
|
|
@@ -152,6 +152,20 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
|
|
|
152
152
|
errors.append(str(exc))
|
|
153
153
|
retriever = None
|
|
154
154
|
|
|
155
|
+
if retriever is None and (backend_hint == "vllm" or embedding_profile == "vllm"):
|
|
156
|
+
try:
|
|
157
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
158
|
+
|
|
159
|
+
adapter = VLLMAdapter(settings)
|
|
160
|
+
retriever = KoreanDenseRetriever(
|
|
161
|
+
model_name=model_name or settings.vllm_embedding_model,
|
|
162
|
+
ollama_adapter=adapter,
|
|
163
|
+
profile=embedding_profile,
|
|
164
|
+
)
|
|
165
|
+
except Exception as exc:
|
|
166
|
+
errors.append(str(exc))
|
|
167
|
+
retriever = None
|
|
168
|
+
|
|
155
169
|
if retriever is None and backend_hint != "ollama":
|
|
156
170
|
try:
|
|
157
171
|
retriever = KoreanDenseRetriever(model_name=model_name)
|
|
@@ -166,7 +180,9 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
|
|
|
166
180
|
batch_size=batch_size if isinstance(batch_size, int) else None,
|
|
167
181
|
)
|
|
168
182
|
meta = {
|
|
169
|
-
"backend": "
|
|
183
|
+
"backend": "vllm"
|
|
184
|
+
if backend_hint == "vllm" or embedding_profile == "vllm"
|
|
185
|
+
else "ollama"
|
|
170
186
|
if retriever.model_name.startswith("qwen3")
|
|
171
187
|
else "sentence-transformers",
|
|
172
188
|
"model": retriever.model_name,
|
|
@@ -77,6 +77,20 @@ class EmbeddingSearcherModule(BaseAnalysisModule):
|
|
|
77
77
|
errors.append(str(exc))
|
|
78
78
|
retriever = None
|
|
79
79
|
|
|
80
|
+
if retriever is None and embedding_profile == "vllm":
|
|
81
|
+
try:
|
|
82
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
83
|
+
|
|
84
|
+
adapter = VLLMAdapter(settings)
|
|
85
|
+
retriever = KoreanDenseRetriever(
|
|
86
|
+
model_name=settings.vllm_embedding_model,
|
|
87
|
+
ollama_adapter=adapter,
|
|
88
|
+
profile=embedding_profile,
|
|
89
|
+
)
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
errors.append(str(exc))
|
|
92
|
+
retriever = None
|
|
93
|
+
|
|
80
94
|
if retriever is None:
|
|
81
95
|
try:
|
|
82
96
|
retriever = KoreanDenseRetriever(model_name=model_name)
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"""Domain Memory adapters for factual, experiential, and working memory layers."""
|
|
2
2
|
|
|
3
|
-
from evalvault.adapters.outbound.domain_memory.
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from evalvault.adapters.outbound.domain_memory.factory import build_domain_memory_adapter
|
|
4
|
+
from evalvault.adapters.outbound.domain_memory.postgres_adapter import PostgresDomainMemoryAdapter
|
|
5
|
+
from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
|
|
6
6
|
|
|
7
|
-
__all__ = [
|
|
7
|
+
__all__ = [
|
|
8
|
+
"SQLiteDomainMemoryAdapter",
|
|
9
|
+
"PostgresDomainMemoryAdapter",
|
|
10
|
+
"build_domain_memory_adapter",
|
|
11
|
+
]
|