evalvault 1.74.0__py3-none-any.whl → 1.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +28 -17
- evalvault/adapters/inbound/api/routers/calibration.py +9 -9
- evalvault/adapters/inbound/api/routers/chat.py +303 -17
- evalvault/adapters/inbound/api/routers/domain.py +10 -5
- evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
- evalvault/adapters/inbound/api/routers/runs.py +23 -4
- evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
- evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
- evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
- evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
- evalvault/adapters/inbound/cli/commands/compare.py +2 -7
- evalvault/adapters/inbound/cli/commands/debug.py +3 -2
- evalvault/adapters/inbound/cli/commands/domain.py +12 -12
- evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
- evalvault/adapters/inbound/cli/commands/gate.py +3 -2
- evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
- evalvault/adapters/inbound/cli/commands/history.py +3 -12
- evalvault/adapters/inbound/cli/commands/method.py +1 -2
- evalvault/adapters/inbound/cli/commands/ops.py +2 -2
- evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
- evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
- evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
- evalvault/adapters/inbound/cli/commands/regress.py +5 -4
- evalvault/adapters/inbound/cli/commands/run.py +42 -31
- evalvault/adapters/inbound/cli/commands/run_helpers.py +24 -15
- evalvault/adapters/inbound/cli/commands/stage.py +6 -25
- evalvault/adapters/inbound/cli/utils/options.py +10 -4
- evalvault/adapters/inbound/mcp/tools.py +11 -8
- evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
- evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
- evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
- evalvault/adapters/outbound/domain_memory/factory.py +68 -0
- evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
- evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
- evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
- evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
- evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
- evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
- evalvault/adapters/outbound/storage/base_sql.py +3 -2
- evalvault/adapters/outbound/storage/factory.py +53 -0
- evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
- evalvault/config/settings.py +31 -7
- evalvault/domain/services/domain_learning_hook.py +2 -1
- evalvault/ports/inbound/web_port.py +3 -1
- evalvault/ports/outbound/storage_port.py +2 -0
- evalvault-1.75.0.dist-info/METADATA +221 -0
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/RECORD +50 -45
- evalvault-1.74.0.dist-info/METADATA +0 -585
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/WHEEL +0 -0
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -21,7 +21,7 @@ from evalvault.adapters.outbound.dataset import get_loader, load_multiturn_datas
|
|
|
21
21
|
from evalvault.adapters.outbound.documents.versioned_loader import (
|
|
22
22
|
load_versioned_chunks_from_pdf_dir,
|
|
23
23
|
)
|
|
24
|
-
from evalvault.adapters.outbound.domain_memory
|
|
24
|
+
from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
|
|
25
25
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
26
26
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
27
27
|
from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
@@ -30,7 +30,7 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
|
30
30
|
PhoenixSyncService,
|
|
31
31
|
build_experiment_metadata,
|
|
32
32
|
)
|
|
33
|
-
from evalvault.adapters.outbound.storage.
|
|
33
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
34
34
|
from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
|
|
35
35
|
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
36
36
|
from evalvault.config.settings import Settings, apply_profile
|
|
@@ -57,6 +57,7 @@ from evalvault.domain.services.ragas_prompt_overrides import (
|
|
|
57
57
|
)
|
|
58
58
|
from evalvault.domain.services.retriever_context import apply_versioned_retriever_to_dataset
|
|
59
59
|
from evalvault.domain.services.stage_event_builder import StageEventBuilder
|
|
60
|
+
from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
|
|
60
61
|
from evalvault.ports.outbound.korean_nlp_port import RetrieverPort
|
|
61
62
|
|
|
62
63
|
from ..utils.analysis_io import (
|
|
@@ -111,7 +112,7 @@ def _build_dense_retriever(
|
|
|
111
112
|
settings: Settings,
|
|
112
113
|
profile_name: str | None,
|
|
113
114
|
) -> Any:
|
|
114
|
-
"""Build and index a dense retriever, preferring
|
|
115
|
+
"""Build and index a dense retriever, preferring OpenAI-compatible embeddings when available."""
|
|
115
116
|
|
|
116
117
|
from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
|
|
117
118
|
|
|
@@ -135,6 +136,17 @@ def _build_dense_retriever(
|
|
|
135
136
|
dense_retriever.index(documents)
|
|
136
137
|
return dense_retriever
|
|
137
138
|
|
|
139
|
+
if settings.llm_provider == "vllm":
|
|
140
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
141
|
+
|
|
142
|
+
adapter = VLLMAdapter(settings)
|
|
143
|
+
dense_retriever = KoreanDenseRetriever(
|
|
144
|
+
model_name=settings.vllm_embedding_model,
|
|
145
|
+
ollama_adapter=adapter,
|
|
146
|
+
)
|
|
147
|
+
dense_retriever.index(documents)
|
|
148
|
+
return dense_retriever
|
|
149
|
+
|
|
138
150
|
try:
|
|
139
151
|
dense_retriever = KoreanDenseRetriever()
|
|
140
152
|
dense_retriever.index(documents)
|
|
@@ -142,7 +154,8 @@ def _build_dense_retriever(
|
|
|
142
154
|
except Exception as exc:
|
|
143
155
|
raise RuntimeError(
|
|
144
156
|
"Dense retriever initialization failed. "
|
|
145
|
-
"Use --profile dev/prod (Ollama embedding),
|
|
157
|
+
"Use --profile dev/prod (Ollama embedding), --profile vllm (vLLM embedding), "
|
|
158
|
+
"or install/prepare a local embedding model."
|
|
146
159
|
) from exc
|
|
147
160
|
|
|
148
161
|
|
|
@@ -1729,7 +1742,7 @@ def register_run_commands(
|
|
|
1729
1742
|
|
|
1730
1743
|
assert llm_adapter is not None
|
|
1731
1744
|
|
|
1732
|
-
memory_adapter:
|
|
1745
|
+
memory_adapter: DomainMemoryPort | None = None
|
|
1733
1746
|
memory_evaluator: MemoryAwareEvaluator | None = None
|
|
1734
1747
|
memory_domain_name = memory_domain or ds.metadata.get("domain") or "default"
|
|
1735
1748
|
memory_required = domain_memory_requested
|
|
@@ -1751,8 +1764,15 @@ def register_run_commands(
|
|
|
1751
1764
|
f"Domain Memory 초기화 시작 (domain={memory_domain_name}, lang={memory_language})",
|
|
1752
1765
|
)
|
|
1753
1766
|
try:
|
|
1754
|
-
|
|
1755
|
-
|
|
1767
|
+
if memory_db:
|
|
1768
|
+
memory_db_path = memory_db
|
|
1769
|
+
elif settings.db_backend == "sqlite":
|
|
1770
|
+
memory_db_path = settings.evalvault_memory_db_path
|
|
1771
|
+
else:
|
|
1772
|
+
memory_db_path = None
|
|
1773
|
+
memory_adapter = build_domain_memory_adapter(
|
|
1774
|
+
settings=settings, db_path=Path(memory_db_path) if memory_db_path else None
|
|
1775
|
+
)
|
|
1756
1776
|
memory_evaluator = MemoryAwareEvaluator(
|
|
1757
1777
|
evaluator=evaluator,
|
|
1758
1778
|
memory_port=memory_adapter,
|
|
@@ -2161,16 +2181,9 @@ def register_run_commands(
|
|
|
2161
2181
|
stored = _write_stage_events_jsonl(stage_events, stage_event_payload)
|
|
2162
2182
|
console.print(f"[green]Saved {stored} stage event(s).[/green]")
|
|
2163
2183
|
if stage_store:
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
2168
|
-
else:
|
|
2169
|
-
print_cli_warning(
|
|
2170
|
-
console,
|
|
2171
|
-
"Stage 이벤트를 저장하려면 --db 경로가 필요합니다.",
|
|
2172
|
-
tips=["--db <sqlite_path> 옵션을 함께 지정하세요."],
|
|
2173
|
-
)
|
|
2184
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2185
|
+
stored = storage.save_stage_events(stage_event_payload)
|
|
2186
|
+
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
2174
2187
|
|
|
2175
2188
|
if effective_tracker != "none":
|
|
2176
2189
|
phoenix_opts = None
|
|
@@ -2194,23 +2207,21 @@ def register_run_commands(
|
|
|
2194
2207
|
log_phoenix_traces_fn=log_phoenix_traces,
|
|
2195
2208
|
)
|
|
2196
2209
|
_log_duration(console, verbose, "Tracker 로깅 완료", tracker_started_at)
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
)
|
|
2208
|
-
_log_duration(console, verbose, "DB 저장 완료", db_started_at)
|
|
2210
|
+
db_started_at = datetime.now()
|
|
2211
|
+
_log_timestamp(console, verbose, "DB 저장 시작")
|
|
2212
|
+
_save_to_db(
|
|
2213
|
+
db_path,
|
|
2214
|
+
result,
|
|
2215
|
+
console,
|
|
2216
|
+
prompt_bundle=prompt_bundle,
|
|
2217
|
+
export_excel=excel_output is None,
|
|
2218
|
+
)
|
|
2219
|
+
_log_duration(console, verbose, "DB 저장 완료", db_started_at)
|
|
2209
2220
|
if excel_output:
|
|
2210
2221
|
excel_started_at = datetime.now()
|
|
2211
2222
|
_log_timestamp(console, verbose, f"엑셀 저장 시작 ({excel_output})")
|
|
2212
2223
|
try:
|
|
2213
|
-
storage =
|
|
2224
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2214
2225
|
storage.export_run_to_excel(result.run_id, excel_output)
|
|
2215
2226
|
console.print(f"[green]Excel export saved: {excel_output}[/green]")
|
|
2216
2227
|
except Exception as exc:
|
|
@@ -2242,7 +2253,7 @@ def register_run_commands(
|
|
|
2242
2253
|
prefix=analysis_prefix,
|
|
2243
2254
|
)
|
|
2244
2255
|
console.print("\n[bold]자동 분석 실행[/bold]")
|
|
2245
|
-
storage =
|
|
2256
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2246
2257
|
pipeline_service = build_analysis_pipeline_service(
|
|
2247
2258
|
storage=storage,
|
|
2248
2259
|
llm_adapter=llm_adapter,
|
|
@@ -18,7 +18,8 @@ from rich.table import Table
|
|
|
18
18
|
from evalvault.adapters.outbound.dataset import StreamingConfig, StreamingDatasetLoader
|
|
19
19
|
from evalvault.adapters.outbound.dataset.thresholds import extract_thresholds_from_rows
|
|
20
20
|
from evalvault.adapters.outbound.kg.networkx_adapter import NetworkXKnowledgeGraph
|
|
21
|
-
from evalvault.adapters.outbound.storage.
|
|
21
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
22
|
+
from evalvault.adapters.outbound.storage.postgres_adapter import PostgreSQLStorageAdapter
|
|
22
23
|
from evalvault.config.phoenix_support import (
|
|
23
24
|
get_phoenix_trace_url,
|
|
24
25
|
instrumentation_span,
|
|
@@ -454,18 +455,21 @@ def _log_to_tracker(
|
|
|
454
455
|
|
|
455
456
|
|
|
456
457
|
def _save_to_db(
|
|
457
|
-
db_path: Path,
|
|
458
|
+
db_path: Path | None,
|
|
458
459
|
result,
|
|
459
460
|
console: Console,
|
|
460
461
|
*,
|
|
461
|
-
storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
|
|
462
462
|
prompt_bundle: PromptSetBundle | None = None,
|
|
463
463
|
export_excel: bool = True,
|
|
464
464
|
) -> None:
|
|
465
|
-
"""Persist evaluation run (and optional prompt set) to
|
|
466
|
-
|
|
465
|
+
"""Persist evaluation run (and optional prompt set) to database."""
|
|
466
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
467
|
+
storage_label = (
|
|
468
|
+
"PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
|
|
469
|
+
)
|
|
470
|
+
export_base = db_path.parent if db_path else Path("data/exports")
|
|
471
|
+
with console.status(f"[bold green]Saving to database {storage_label}..."):
|
|
467
472
|
try:
|
|
468
|
-
storage = storage_cls(db_path=db_path)
|
|
469
473
|
if prompt_bundle:
|
|
470
474
|
storage.save_prompt_set(prompt_bundle)
|
|
471
475
|
storage.save_run(result)
|
|
@@ -475,7 +479,8 @@ def _save_to_db(
|
|
|
475
479
|
prompt_bundle.prompt_set.prompt_set_id,
|
|
476
480
|
)
|
|
477
481
|
if export_excel:
|
|
478
|
-
|
|
482
|
+
export_base.mkdir(parents=True, exist_ok=True)
|
|
483
|
+
excel_path = export_base / f"evalvault_run_{result.run_id}.xlsx"
|
|
479
484
|
try:
|
|
480
485
|
storage.export_run_to_excel(result.run_id, excel_path)
|
|
481
486
|
console.print(f"[green]Excel export saved: {excel_path}[/green]")
|
|
@@ -485,7 +490,7 @@ def _save_to_db(
|
|
|
485
490
|
"엑셀 내보내기에 실패했습니다.",
|
|
486
491
|
tips=[str(exc)],
|
|
487
492
|
)
|
|
488
|
-
console.print(f"[green]Results saved to database: {
|
|
493
|
+
console.print(f"[green]Results saved to database: {storage_label}[/green]")
|
|
489
494
|
console.print(f"[dim]Run ID: {result.run_id}[/dim]")
|
|
490
495
|
if prompt_bundle:
|
|
491
496
|
console.print(
|
|
@@ -502,21 +507,24 @@ def _save_to_db(
|
|
|
502
507
|
|
|
503
508
|
|
|
504
509
|
def _save_multiturn_to_db(
|
|
505
|
-
db_path: Path,
|
|
510
|
+
db_path: Path | None,
|
|
506
511
|
run_record: MultiTurnRunRecord,
|
|
507
512
|
conversations: list[MultiTurnConversationRecord],
|
|
508
513
|
turn_results: list[MultiTurnTurnResult],
|
|
509
514
|
console: Console,
|
|
510
515
|
*,
|
|
511
|
-
storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
|
|
512
516
|
export_excel: bool = True,
|
|
513
517
|
excel_output_path: Path | None = None,
|
|
514
518
|
metric_thresholds: dict[str, float] | None = None,
|
|
515
519
|
) -> None:
|
|
516
|
-
"""Persist multiturn evaluation run to
|
|
517
|
-
|
|
520
|
+
"""Persist multiturn evaluation run to database."""
|
|
521
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
522
|
+
storage_label = (
|
|
523
|
+
"PostgreSQL" if isinstance(storage, PostgreSQLStorageAdapter) else f"SQLite ({db_path})"
|
|
524
|
+
)
|
|
525
|
+
export_base = db_path.parent if db_path else Path("data/exports")
|
|
526
|
+
with console.status(f"[bold green]Saving multiturn run to {storage_label}..."):
|
|
518
527
|
try:
|
|
519
|
-
storage = storage_cls(db_path=db_path)
|
|
520
528
|
storage.save_multiturn_run(
|
|
521
529
|
run_record,
|
|
522
530
|
conversations,
|
|
@@ -524,8 +532,9 @@ def _save_multiturn_to_db(
|
|
|
524
532
|
metric_thresholds=metric_thresholds,
|
|
525
533
|
)
|
|
526
534
|
if export_excel:
|
|
535
|
+
export_base.mkdir(parents=True, exist_ok=True)
|
|
527
536
|
excel_path = excel_output_path or (
|
|
528
|
-
|
|
537
|
+
export_base / f"evalvault_multiturn_{run_record.run_id}.xlsx"
|
|
529
538
|
)
|
|
530
539
|
try:
|
|
531
540
|
storage.export_multiturn_run_to_excel(run_record.run_id, excel_path)
|
|
@@ -536,7 +545,7 @@ def _save_multiturn_to_db(
|
|
|
536
545
|
"멀티턴 엑셀 내보내기에 실패했습니다.",
|
|
537
546
|
tips=[str(exc)],
|
|
538
547
|
)
|
|
539
|
-
console.print(f"[green]Multiturn results saved to database: {
|
|
548
|
+
console.print(f"[green]Multiturn results saved to database: {storage_label}[/green]")
|
|
540
549
|
console.print(f"[dim]Run ID: {run_record.run_id}[/dim]")
|
|
541
550
|
except Exception as exc: # pragma: no cover - persistence errors
|
|
542
551
|
print_cli_error(
|
|
@@ -16,7 +16,7 @@ from rich.table import Table
|
|
|
16
16
|
from evalvault.adapters.outbound.improvement.stage_metric_playbook_loader import (
|
|
17
17
|
StageMetricPlaybookLoader,
|
|
18
18
|
)
|
|
19
|
-
from evalvault.adapters.outbound.storage.
|
|
19
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
20
20
|
from evalvault.config.settings import Settings
|
|
21
21
|
from evalvault.domain.entities.stage import REQUIRED_STAGE_TYPES, StageEvent, StageMetric
|
|
22
22
|
from evalvault.domain.services.stage_metric_guide_service import StageMetricGuideService
|
|
@@ -28,13 +28,6 @@ from ..utils.options import db_option
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def _resolve_db_path(db_path: Path | None) -> Path:
|
|
32
|
-
resolved = db_path or Settings().evalvault_db_path
|
|
33
|
-
if resolved is None:
|
|
34
|
-
raise typer.BadParameter("Database path is not configured.")
|
|
35
|
-
return resolved
|
|
36
|
-
|
|
37
|
-
|
|
38
31
|
@dataclass
|
|
39
32
|
class ValidationStats:
|
|
40
33
|
"""Tracks StageEvent validation failures by error type."""
|
|
@@ -122,8 +115,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
122
115
|
console.print("[yellow]No valid stage events found in the input file.[/yellow]")
|
|
123
116
|
raise typer.Exit(1)
|
|
124
117
|
|
|
125
|
-
|
|
126
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
118
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
127
119
|
stored = storage.save_stage_events(events)
|
|
128
120
|
|
|
129
121
|
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
@@ -147,8 +139,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
147
139
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
148
140
|
) -> None:
|
|
149
141
|
"""List stage events for a run."""
|
|
150
|
-
|
|
151
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
142
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
152
143
|
events = storage.list_stage_events(run_id, stage_type=stage_type)
|
|
153
144
|
|
|
154
145
|
if not events:
|
|
@@ -184,8 +175,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
184
175
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
185
176
|
) -> None:
|
|
186
177
|
"""Show summary stats for stage events."""
|
|
187
|
-
|
|
188
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
178
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
189
179
|
events = storage.list_stage_events(run_id)
|
|
190
180
|
if not events:
|
|
191
181
|
console.print("[yellow]No stage events found.[/yellow]")
|
|
@@ -218,8 +208,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
218
208
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
219
209
|
) -> None:
|
|
220
210
|
"""Compute stage metrics from stored events."""
|
|
221
|
-
|
|
222
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
211
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
223
212
|
events = storage.list_stage_events(run_id)
|
|
224
213
|
if not events:
|
|
225
214
|
console.print("[yellow]No stage events found.[/yellow]")
|
|
@@ -276,8 +265,7 @@ def create_stage_app(console: Console) -> typer.Typer:
|
|
|
276
265
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
277
266
|
) -> None:
|
|
278
267
|
"""Report stage summary, metrics, and improvement guides."""
|
|
279
|
-
|
|
280
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
268
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
281
269
|
events = storage.list_stage_events(run_id)
|
|
282
270
|
if not events:
|
|
283
271
|
console.print("[yellow]No stage events found.[/yellow]")
|
|
@@ -547,13 +535,6 @@ def _load_default_profile() -> str | None:
|
|
|
547
535
|
return None
|
|
548
536
|
|
|
549
537
|
|
|
550
|
-
def _resolve_db_path(db_path: Path | None) -> Path:
|
|
551
|
-
resolved = db_path or Settings().evalvault_db_path
|
|
552
|
-
if resolved is None:
|
|
553
|
-
raise typer.BadParameter("Database path is not configured.")
|
|
554
|
-
return resolved
|
|
555
|
-
|
|
556
|
-
|
|
557
538
|
def _print_stage_summary(console: Console, summary_data) -> None:
|
|
558
539
|
table = Table(show_header=True, header_style="bold cyan")
|
|
559
540
|
table.add_column("Stage Type")
|
|
@@ -31,11 +31,11 @@ def profile_option(
|
|
|
31
31
|
def db_option(
|
|
32
32
|
*,
|
|
33
33
|
default: str | Path | None = _UNSET,
|
|
34
|
-
help_text: str = "
|
|
34
|
+
help_text: str = "SQLite DB path (PostgreSQL is default when omitted).",
|
|
35
35
|
) -> Path | None:
|
|
36
36
|
"""Shared --db / -D option definition."""
|
|
37
37
|
|
|
38
|
-
resolved_default =
|
|
38
|
+
resolved_default = None if default is _UNSET else default
|
|
39
39
|
normalized_default = _normalize_path(resolved_default)
|
|
40
40
|
return typer.Option(
|
|
41
41
|
normalized_default,
|
|
@@ -49,11 +49,17 @@ def db_option(
|
|
|
49
49
|
def memory_db_option(
|
|
50
50
|
*,
|
|
51
51
|
default: str | Path | None = _UNSET,
|
|
52
|
-
help_text: str = "
|
|
52
|
+
help_text: str = "Domain Memory SQLite path (Postgres is default when omitted).",
|
|
53
53
|
) -> Path | None:
|
|
54
54
|
"""Shared option factory for the domain memory database path."""
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
if default is _UNSET:
|
|
57
|
+
settings = Settings()
|
|
58
|
+
resolved_default = (
|
|
59
|
+
settings.evalvault_memory_db_path if settings.db_backend == "sqlite" else None
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
resolved_default = default
|
|
57
63
|
normalized_default = _normalize_path(resolved_default)
|
|
58
64
|
return typer.Option(
|
|
59
65
|
normalized_default,
|
|
@@ -20,12 +20,13 @@ from evalvault.adapters.outbound.analysis.pipeline_factory import build_analysis
|
|
|
20
20
|
from evalvault.adapters.outbound.analysis.statistical_adapter import StatisticalAnalysisAdapter
|
|
21
21
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
22
22
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
23
|
-
from evalvault.adapters.outbound.storage.
|
|
23
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
24
24
|
from evalvault.config.settings import Settings, apply_profile
|
|
25
25
|
from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
|
|
26
26
|
from evalvault.domain.services.analysis_service import AnalysisService
|
|
27
27
|
from evalvault.domain.services.evaluator import RagasEvaluator
|
|
28
28
|
from evalvault.ports.inbound.web_port import EvalRequest, RunFilters, RunSummary
|
|
29
|
+
from evalvault.ports.outbound.storage_port import StoragePort
|
|
29
30
|
|
|
30
31
|
from .schemas import (
|
|
31
32
|
AnalyzeCompareRequest,
|
|
@@ -82,7 +83,7 @@ def list_runs(payload: dict[str, Any] | ListRunsRequest) -> ListRunsResponse:
|
|
|
82
83
|
errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)]
|
|
83
84
|
)
|
|
84
85
|
|
|
85
|
-
storage =
|
|
86
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
86
87
|
adapter = WebUIAdapter(storage=storage, settings=Settings())
|
|
87
88
|
|
|
88
89
|
filters = RunFilters(
|
|
@@ -123,7 +124,7 @@ def get_run_summary(payload: dict[str, Any] | GetRunSummaryRequest) -> GetRunSum
|
|
|
123
124
|
errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)]
|
|
124
125
|
)
|
|
125
126
|
|
|
126
|
-
storage =
|
|
127
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
127
128
|
try:
|
|
128
129
|
run = storage.get_run(request.run_id)
|
|
129
130
|
except KeyError as exc:
|
|
@@ -175,7 +176,7 @@ def run_evaluation(payload: dict[str, Any] | RunEvaluationRequest) -> RunEvaluat
|
|
|
175
176
|
errors=[_error("EVAL_LLM_INIT_FAILED", str(exc), stage=ErrorStage.evaluate)],
|
|
176
177
|
)
|
|
177
178
|
|
|
178
|
-
storage =
|
|
179
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
179
180
|
llm_factory = SettingsLLMFactory(settings)
|
|
180
181
|
korean_toolkit = try_create_korean_toolkit()
|
|
181
182
|
evaluator = RagasEvaluator(korean_toolkit=korean_toolkit, llm_factory=llm_factory)
|
|
@@ -266,7 +267,7 @@ def analyze_compare(payload: dict[str, Any] | AnalyzeCompareRequest) -> AnalyzeC
|
|
|
266
267
|
errors=[_error("EVAL_DB_UNSAFE_PATH", str(exc), stage=ErrorStage.storage)],
|
|
267
268
|
)
|
|
268
269
|
|
|
269
|
-
storage =
|
|
270
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
270
271
|
try:
|
|
271
272
|
run_a = storage.get_run(request.run_id_a)
|
|
272
273
|
run_b = storage.get_run(request.run_id_b)
|
|
@@ -503,9 +504,11 @@ def _serialize_run_summary(summary: RunSummary) -> RunSummaryPayload:
|
|
|
503
504
|
return RunSummaryPayload.model_validate(payload)
|
|
504
505
|
|
|
505
506
|
|
|
506
|
-
def _resolve_db_path(db_path: Path | None) -> Path:
|
|
507
|
+
def _resolve_db_path(db_path: Path | None) -> Path | None:
|
|
508
|
+
settings = Settings()
|
|
507
509
|
if db_path is None:
|
|
508
|
-
settings
|
|
510
|
+
if getattr(settings, "db_backend", "postgres") != "sqlite":
|
|
511
|
+
return None
|
|
509
512
|
db_path = Path(settings.evalvault_db_path)
|
|
510
513
|
resolved = db_path.expanduser().resolve()
|
|
511
514
|
_ensure_allowed_path(resolved)
|
|
@@ -547,7 +550,7 @@ def _run_auto_analysis(
|
|
|
547
550
|
*,
|
|
548
551
|
run_id: str,
|
|
549
552
|
run: Any,
|
|
550
|
-
storage:
|
|
553
|
+
storage: StoragePort,
|
|
551
554
|
llm_adapter: Any,
|
|
552
555
|
analysis_output: Path | None,
|
|
553
556
|
analysis_report: Path | None,
|
|
@@ -152,6 +152,20 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
|
|
|
152
152
|
errors.append(str(exc))
|
|
153
153
|
retriever = None
|
|
154
154
|
|
|
155
|
+
if retriever is None and (backend_hint == "vllm" or embedding_profile == "vllm"):
|
|
156
|
+
try:
|
|
157
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
158
|
+
|
|
159
|
+
adapter = VLLMAdapter(settings)
|
|
160
|
+
retriever = KoreanDenseRetriever(
|
|
161
|
+
model_name=model_name or settings.vllm_embedding_model,
|
|
162
|
+
ollama_adapter=adapter,
|
|
163
|
+
profile=embedding_profile,
|
|
164
|
+
)
|
|
165
|
+
except Exception as exc:
|
|
166
|
+
errors.append(str(exc))
|
|
167
|
+
retriever = None
|
|
168
|
+
|
|
155
169
|
if retriever is None and backend_hint != "ollama":
|
|
156
170
|
try:
|
|
157
171
|
retriever = KoreanDenseRetriever(model_name=model_name)
|
|
@@ -166,7 +180,9 @@ class EmbeddingAnalyzerModule(BaseAnalysisModule):
|
|
|
166
180
|
batch_size=batch_size if isinstance(batch_size, int) else None,
|
|
167
181
|
)
|
|
168
182
|
meta = {
|
|
169
|
-
"backend": "
|
|
183
|
+
"backend": "vllm"
|
|
184
|
+
if backend_hint == "vllm" or embedding_profile == "vllm"
|
|
185
|
+
else "ollama"
|
|
170
186
|
if retriever.model_name.startswith("qwen3")
|
|
171
187
|
else "sentence-transformers",
|
|
172
188
|
"model": retriever.model_name,
|
|
@@ -77,6 +77,20 @@ class EmbeddingSearcherModule(BaseAnalysisModule):
|
|
|
77
77
|
errors.append(str(exc))
|
|
78
78
|
retriever = None
|
|
79
79
|
|
|
80
|
+
if retriever is None and embedding_profile == "vllm":
|
|
81
|
+
try:
|
|
82
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
83
|
+
|
|
84
|
+
adapter = VLLMAdapter(settings)
|
|
85
|
+
retriever = KoreanDenseRetriever(
|
|
86
|
+
model_name=settings.vllm_embedding_model,
|
|
87
|
+
ollama_adapter=adapter,
|
|
88
|
+
profile=embedding_profile,
|
|
89
|
+
)
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
errors.append(str(exc))
|
|
92
|
+
retriever = None
|
|
93
|
+
|
|
80
94
|
if retriever is None:
|
|
81
95
|
try:
|
|
82
96
|
retriever = KoreanDenseRetriever(model_name=model_name)
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"""Domain Memory adapters for factual, experiential, and working memory layers."""
|
|
2
2
|
|
|
3
|
-
from evalvault.adapters.outbound.domain_memory.
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from evalvault.adapters.outbound.domain_memory.factory import build_domain_memory_adapter
|
|
4
|
+
from evalvault.adapters.outbound.domain_memory.postgres_adapter import PostgresDomainMemoryAdapter
|
|
5
|
+
from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
|
|
6
6
|
|
|
7
|
-
__all__ = [
|
|
7
|
+
__all__ = [
|
|
8
|
+
"SQLiteDomainMemoryAdapter",
|
|
9
|
+
"PostgresDomainMemoryAdapter",
|
|
10
|
+
"build_domain_memory_adapter",
|
|
11
|
+
]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from evalvault.adapters.outbound.domain_memory.postgres_adapter import PostgresDomainMemoryAdapter
|
|
8
|
+
from evalvault.adapters.outbound.domain_memory.sqlite_adapter import SQLiteDomainMemoryAdapter
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from evalvault.config.settings import Settings
|
|
12
|
+
from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def build_domain_memory_adapter(
|
|
18
|
+
*,
|
|
19
|
+
settings: Settings | None = None,
|
|
20
|
+
db_path: Path | None = None,
|
|
21
|
+
fallback_to_sqlite: bool = True,
|
|
22
|
+
) -> DomainMemoryPort:
|
|
23
|
+
"""Build domain memory adapter based on settings and parameters.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
settings: Application settings (uses default if None)
|
|
27
|
+
db_path: Explicit SQLite database path (forces SQLite if provided)
|
|
28
|
+
fallback_to_sqlite: Fall back to SQLite if Postgres fails
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
DomainMemoryPort implementation (Postgres by default, SQLite if specified)
|
|
32
|
+
"""
|
|
33
|
+
from evalvault.config.settings import Settings
|
|
34
|
+
|
|
35
|
+
resolved_settings = settings or Settings()
|
|
36
|
+
|
|
37
|
+
if db_path is not None:
|
|
38
|
+
return SQLiteDomainMemoryAdapter(db_path=db_path)
|
|
39
|
+
|
|
40
|
+
backend = getattr(resolved_settings, "db_backend", "postgres")
|
|
41
|
+
if backend == "sqlite":
|
|
42
|
+
resolved_db_path = resolved_settings.evalvault_memory_db_path
|
|
43
|
+
if resolved_db_path is None:
|
|
44
|
+
raise RuntimeError("SQLite backend selected but evalvault_memory_db_path is not set.")
|
|
45
|
+
return SQLiteDomainMemoryAdapter(db_path=resolved_db_path)
|
|
46
|
+
|
|
47
|
+
conn_string = resolved_settings.postgres_connection_string
|
|
48
|
+
if not conn_string:
|
|
49
|
+
host = resolved_settings.postgres_host or "localhost"
|
|
50
|
+
port = resolved_settings.postgres_port
|
|
51
|
+
database = resolved_settings.postgres_database
|
|
52
|
+
user = resolved_settings.postgres_user or "postgres"
|
|
53
|
+
password = resolved_settings.postgres_password or ""
|
|
54
|
+
conn_string = f"host={host} port={port} dbname={database} user={user} password={password}"
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
return PostgresDomainMemoryAdapter(connection_string=conn_string)
|
|
58
|
+
except Exception as exc:
|
|
59
|
+
if not fallback_to_sqlite:
|
|
60
|
+
raise
|
|
61
|
+
logger.warning("PostgreSQL domain memory adapter failed (%s). Falling back to SQLite.", exc)
|
|
62
|
+
resolved_db_path = resolved_settings.evalvault_memory_db_path
|
|
63
|
+
if resolved_db_path is None:
|
|
64
|
+
raise
|
|
65
|
+
return SQLiteDomainMemoryAdapter(db_path=resolved_db_path)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
__all__ = ["build_domain_memory_adapter"]
|