evalvault 1.74.0__py3-none-any.whl → 1.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +28 -17
- evalvault/adapters/inbound/api/routers/calibration.py +9 -9
- evalvault/adapters/inbound/api/routers/chat.py +303 -17
- evalvault/adapters/inbound/api/routers/domain.py +10 -5
- evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
- evalvault/adapters/inbound/api/routers/runs.py +23 -4
- evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
- evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
- evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
- evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
- evalvault/adapters/inbound/cli/commands/compare.py +2 -7
- evalvault/adapters/inbound/cli/commands/debug.py +3 -2
- evalvault/adapters/inbound/cli/commands/domain.py +12 -12
- evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
- evalvault/adapters/inbound/cli/commands/gate.py +3 -2
- evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
- evalvault/adapters/inbound/cli/commands/history.py +3 -12
- evalvault/adapters/inbound/cli/commands/method.py +1 -2
- evalvault/adapters/inbound/cli/commands/ops.py +2 -2
- evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
- evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
- evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
- evalvault/adapters/inbound/cli/commands/regress.py +5 -4
- evalvault/adapters/inbound/cli/commands/run.py +42 -31
- evalvault/adapters/inbound/cli/commands/run_helpers.py +24 -15
- evalvault/adapters/inbound/cli/commands/stage.py +6 -25
- evalvault/adapters/inbound/cli/utils/options.py +10 -4
- evalvault/adapters/inbound/mcp/tools.py +11 -8
- evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
- evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
- evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
- evalvault/adapters/outbound/domain_memory/factory.py +68 -0
- evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
- evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
- evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
- evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
- evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
- evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
- evalvault/adapters/outbound/storage/base_sql.py +3 -2
- evalvault/adapters/outbound/storage/factory.py +53 -0
- evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
- evalvault/config/settings.py +31 -7
- evalvault/domain/services/domain_learning_hook.py +2 -1
- evalvault/ports/inbound/web_port.py +3 -1
- evalvault/ports/outbound/storage_port.py +2 -0
- evalvault-1.75.0.dist-info/METADATA +221 -0
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/RECORD +50 -45
- evalvault-1.74.0.dist-info/METADATA +0 -585
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/WHEEL +0 -0
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.74.0.dist-info → evalvault-1.75.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -26,7 +26,8 @@ from evalvault.adapters.outbound.analysis.pipeline_helpers import to_serializabl
|
|
|
26
26
|
from evalvault.adapters.outbound.cache import MemoryCacheAdapter
|
|
27
27
|
from evalvault.adapters.outbound.llm import get_llm_adapter
|
|
28
28
|
from evalvault.adapters.outbound.report import DashboardGenerator, MarkdownReportAdapter
|
|
29
|
-
from evalvault.adapters.outbound.storage.
|
|
29
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
30
|
+
from evalvault.adapters.outbound.storage.postgres_adapter import PostgreSQLStorageAdapter
|
|
30
31
|
from evalvault.config.phoenix_support import get_phoenix_trace_url
|
|
31
32
|
from evalvault.config.settings import Settings, apply_profile
|
|
32
33
|
from evalvault.domain.entities import EvaluationRun
|
|
@@ -115,11 +116,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
115
116
|
) -> None:
|
|
116
117
|
"""평가 실행 결과를 분석하고 통계 인사이트를 표시합니다."""
|
|
117
118
|
|
|
118
|
-
|
|
119
|
-
if resolved_db_path is None:
|
|
120
|
-
_console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
|
|
121
|
-
raise typer.Exit(1)
|
|
122
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
119
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
123
120
|
|
|
124
121
|
try:
|
|
125
122
|
run = storage.get_run(run_id)
|
|
@@ -217,7 +214,12 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
217
214
|
_save_analysis_payload(bundle.causal, "causal")
|
|
218
215
|
if improvement_report is not None:
|
|
219
216
|
_save_analysis_payload(improvement_report, "playbook")
|
|
220
|
-
|
|
217
|
+
storage_label = (
|
|
218
|
+
"PostgreSQL"
|
|
219
|
+
if isinstance(storage, PostgreSQLStorageAdapter)
|
|
220
|
+
else f"SQLite ({db_path})"
|
|
221
|
+
)
|
|
222
|
+
_console.print(f"\n[green]분석 결과 DB 저장: {storage_label}[/green]")
|
|
221
223
|
|
|
222
224
|
if dashboard:
|
|
223
225
|
dashboard_gen = DashboardGenerator()
|
|
@@ -359,11 +361,7 @@ def register_analyze_commands(app: typer.Typer, console: Console) -> None:
|
|
|
359
361
|
) -> None:
|
|
360
362
|
"""두 실행을 통계적으로 비교합니다."""
|
|
361
363
|
|
|
362
|
-
|
|
363
|
-
if resolved_db_path is None:
|
|
364
|
-
_console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
|
|
365
|
-
raise typer.Exit(1)
|
|
366
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
364
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
367
365
|
|
|
368
366
|
try:
|
|
369
367
|
run_a = storage.get_run(run_id1)
|
|
@@ -385,7 +385,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
385
385
|
"""
|
|
386
386
|
try:
|
|
387
387
|
from evalvault.adapters.outbound.benchmark import LMEvalAdapter
|
|
388
|
-
from evalvault.adapters.outbound.storage import
|
|
388
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
389
389
|
from evalvault.config.settings import get_settings
|
|
390
390
|
from evalvault.domain.services.benchmark_service import BenchmarkService
|
|
391
391
|
from evalvault.ports.outbound.benchmark_port import BenchmarkBackend
|
|
@@ -426,7 +426,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
426
426
|
ensure_phoenix_instrumentation(settings, console=console, force=True)
|
|
427
427
|
|
|
428
428
|
benchmark_adapter = LMEvalAdapter(settings=settings)
|
|
429
|
-
storage_adapter =
|
|
429
|
+
storage_adapter = build_storage_adapter(settings=settings, db_path=db)
|
|
430
430
|
tracer_adapter = _create_tracer_adapter(phoenix)
|
|
431
431
|
service = BenchmarkService(
|
|
432
432
|
benchmark_adapter=benchmark_adapter,
|
|
@@ -556,9 +556,11 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
556
556
|
),
|
|
557
557
|
) -> None:
|
|
558
558
|
"""View past benchmark runs."""
|
|
559
|
-
from evalvault.adapters.outbound.storage import
|
|
559
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
560
|
+
from evalvault.config.settings import get_settings
|
|
560
561
|
|
|
561
|
-
|
|
562
|
+
settings = get_settings()
|
|
563
|
+
storage = build_storage_adapter(settings=settings, db_path=db)
|
|
562
564
|
runs = storage.list_benchmark_runs(
|
|
563
565
|
benchmark_type=benchmark_type,
|
|
564
566
|
model_name=model_name,
|
|
@@ -629,7 +631,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
629
631
|
evalvault benchmark report abc123
|
|
630
632
|
evalvault benchmark report abc123 -o report.md -p dev
|
|
631
633
|
"""
|
|
632
|
-
from evalvault.adapters.outbound.storage import
|
|
634
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
633
635
|
from evalvault.config.settings import get_settings
|
|
634
636
|
from evalvault.domain.services.benchmark_report_service import (
|
|
635
637
|
BenchmarkReportService,
|
|
@@ -639,7 +641,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
639
641
|
if profile:
|
|
640
642
|
settings.profile = profile
|
|
641
643
|
|
|
642
|
-
storage =
|
|
644
|
+
storage = build_storage_adapter(settings=settings, db_path=db)
|
|
643
645
|
benchmark_run = storage.get_benchmark_run(run_id)
|
|
644
646
|
|
|
645
647
|
if not benchmark_run:
|
|
@@ -717,7 +719,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
717
719
|
evalvault benchmark compare abc123 def456
|
|
718
720
|
evalvault benchmark compare abc123 def456 -o comparison.md
|
|
719
721
|
"""
|
|
720
|
-
from evalvault.adapters.outbound.storage import
|
|
722
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
721
723
|
from evalvault.config.settings import get_settings
|
|
722
724
|
from evalvault.domain.services.benchmark_report_service import (
|
|
723
725
|
BenchmarkReportService,
|
|
@@ -727,7 +729,7 @@ def create_benchmark_app(console: Console) -> typer.Typer:
|
|
|
727
729
|
if profile:
|
|
728
730
|
settings.profile = profile
|
|
729
731
|
|
|
730
|
-
storage =
|
|
732
|
+
storage = build_storage_adapter(settings=settings, db_path=db)
|
|
731
733
|
baseline = storage.get_benchmark_run(baseline_id)
|
|
732
734
|
target = storage.get_benchmark_run(target_id)
|
|
733
735
|
|
|
@@ -7,7 +7,7 @@ import typer
|
|
|
7
7
|
from rich.console import Console
|
|
8
8
|
from rich.table import Table
|
|
9
9
|
|
|
10
|
-
from evalvault.adapters.outbound.storage.
|
|
10
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
11
11
|
from evalvault.config.settings import Settings
|
|
12
12
|
from evalvault.domain.services.satisfaction_calibration_service import (
|
|
13
13
|
SatisfactionCalibrationService,
|
|
@@ -36,12 +36,7 @@ def register_calibrate_commands(app: typer.Typer, console: Console) -> None:
|
|
|
36
36
|
),
|
|
37
37
|
db_path: Path | None = db_option(help_text="DB 경로"),
|
|
38
38
|
) -> None:
|
|
39
|
-
|
|
40
|
-
if resolved_db_path is None:
|
|
41
|
-
_console.print("[red]오류: DB 경로가 설정되지 않았습니다.[/red]")
|
|
42
|
-
raise typer.Exit(1)
|
|
43
|
-
|
|
44
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
39
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
45
40
|
try:
|
|
46
41
|
run = storage.get_run(run_id)
|
|
47
42
|
except KeyError:
|
|
@@ -12,7 +12,7 @@ from evalvault.adapters.inbound.cli.utils.console import print_cli_error, progre
|
|
|
12
12
|
from evalvault.adapters.inbound.cli.utils.options import db_option
|
|
13
13
|
from evalvault.adapters.inbound.cli.utils.validators import parse_csv_option, validate_choice
|
|
14
14
|
from evalvault.adapters.outbound.judge_calibration_reporter import JudgeCalibrationReporter
|
|
15
|
-
from evalvault.adapters.outbound.storage.
|
|
15
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
16
16
|
from evalvault.config.settings import Settings
|
|
17
17
|
from evalvault.domain.services.judge_calibration_service import JudgeCalibrationService
|
|
18
18
|
|
|
@@ -75,11 +75,6 @@ def register_calibrate_judge_commands(app: typer.Typer, console: Console) -> Non
|
|
|
75
75
|
concurrency: int = typer.Option(8, "--concurrency", help="동시성 수준"),
|
|
76
76
|
db_path: Path | None = db_option(help_text="DB 경로"),
|
|
77
77
|
) -> None:
|
|
78
|
-
resolved_db_path = db_path or Settings().evalvault_db_path
|
|
79
|
-
if resolved_db_path is None:
|
|
80
|
-
print_cli_error(_console, "DB 경로가 설정되지 않았습니다.")
|
|
81
|
-
raise typer.Exit(1)
|
|
82
|
-
|
|
83
78
|
labels_source = labels_source.strip().lower()
|
|
84
79
|
method = method.strip().lower()
|
|
85
80
|
validate_choice(labels_source, _ALLOWED_LABELS, _console, value_label="labels-source")
|
|
@@ -96,7 +91,7 @@ def register_calibrate_judge_commands(app: typer.Typer, console: Console) -> Non
|
|
|
96
91
|
print_cli_error(_console, "--concurrency 값은 1 이상이어야 합니다.")
|
|
97
92
|
raise typer.Exit(1)
|
|
98
93
|
|
|
99
|
-
storage =
|
|
94
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
100
95
|
try:
|
|
101
96
|
run = storage.get_run(run_id)
|
|
102
97
|
except KeyError:
|
|
@@ -12,7 +12,7 @@ from evalvault.adapters.outbound.analysis.comparison_pipeline_adapter import (
|
|
|
12
12
|
)
|
|
13
13
|
from evalvault.adapters.outbound.analysis.pipeline_factory import build_analysis_pipeline_service
|
|
14
14
|
from evalvault.adapters.outbound.analysis.statistical_adapter import StatisticalAnalysisAdapter
|
|
15
|
-
from evalvault.adapters.outbound.storage.
|
|
15
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
16
16
|
from evalvault.config.settings import Settings, apply_profile
|
|
17
17
|
from evalvault.domain.services.run_comparison_service import (
|
|
18
18
|
RunComparisonError,
|
|
@@ -91,15 +91,10 @@ def register_compare_commands(app: typer.Typer, console: Console) -> None:
|
|
|
91
91
|
validate_choice(test, ["t-test", "mann-whitney"], console, value_label="test")
|
|
92
92
|
validate_choice(output_format, ["table", "json"], console, value_label="format")
|
|
93
93
|
|
|
94
|
-
resolved_db_path = db_path or Settings().evalvault_db_path
|
|
95
|
-
if resolved_db_path is None:
|
|
96
|
-
print_cli_error(console, "DB 경로가 설정되지 않았습니다.")
|
|
97
|
-
raise typer.Exit(1)
|
|
98
|
-
|
|
99
94
|
metric_list = parse_csv_option(metrics)
|
|
100
95
|
metric_list = metric_list or None
|
|
101
96
|
|
|
102
|
-
storage =
|
|
97
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
103
98
|
analysis_adapter = StatisticalAnalysisAdapter()
|
|
104
99
|
|
|
105
100
|
settings = Settings()
|
|
@@ -9,7 +9,8 @@ from rich.console import Console
|
|
|
9
9
|
from rich.markdown import Markdown
|
|
10
10
|
|
|
11
11
|
from evalvault.adapters.outbound.debug.report_renderer import render_json, render_markdown
|
|
12
|
-
from evalvault.adapters.outbound.storage.
|
|
12
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
13
|
+
from evalvault.config.settings import Settings
|
|
13
14
|
from evalvault.domain.services.debug_report_service import DebugReportService
|
|
14
15
|
|
|
15
16
|
from ..utils.options import db_option
|
|
@@ -42,7 +43,7 @@ def create_debug_app(console: Console) -> typer.Typer:
|
|
|
42
43
|
|
|
43
44
|
validate_choice(format, ["markdown", "json"], console, value_label="format")
|
|
44
45
|
|
|
45
|
-
storage =
|
|
46
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
46
47
|
service = DebugReportService()
|
|
47
48
|
|
|
48
49
|
try:
|
|
@@ -11,9 +11,7 @@ import typer
|
|
|
11
11
|
from rich.console import Console
|
|
12
12
|
from rich.table import Table
|
|
13
13
|
|
|
14
|
-
from evalvault.adapters.outbound.domain_memory
|
|
15
|
-
SQLiteDomainMemoryAdapter,
|
|
16
|
-
)
|
|
14
|
+
from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
|
|
17
15
|
from evalvault.config.domain_config import (
|
|
18
16
|
generate_domain_template,
|
|
19
17
|
list_domains,
|
|
@@ -23,6 +21,7 @@ from evalvault.config.domain_config import (
|
|
|
23
21
|
from evalvault.domain.entities.memory import FactType
|
|
24
22
|
from evalvault.domain.services.domain_learning_hook import DomainLearningHook
|
|
25
23
|
from evalvault.domain.services.embedding_overlay import build_cluster_facts
|
|
24
|
+
from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
|
|
26
25
|
|
|
27
26
|
from ..utils.options import memory_db_option
|
|
28
27
|
from ..utils.validators import parse_csv_option, validate_choices
|
|
@@ -117,8 +116,8 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
117
116
|
memory_app = typer.Typer(name="memory", help="Domain memory utilities.")
|
|
118
117
|
domain_app.add_typer(memory_app, name="memory")
|
|
119
118
|
|
|
120
|
-
def _load_memory_adapter(db_path: Path) ->
|
|
121
|
-
return
|
|
119
|
+
def _load_memory_adapter(db_path: Path | None) -> DomainMemoryPort:
|
|
120
|
+
return build_domain_memory_adapter(db_path=db_path)
|
|
122
121
|
|
|
123
122
|
def _truncate(text: str, max_length: int = 40) -> str:
|
|
124
123
|
if len(text) <= max_length:
|
|
@@ -160,7 +159,7 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
160
159
|
"-d",
|
|
161
160
|
help="Filter by domain (leave empty for global stats).",
|
|
162
161
|
),
|
|
163
|
-
memory_db: Path = memory_db_option(),
|
|
162
|
+
memory_db: Path | None = memory_db_option(),
|
|
164
163
|
) -> None:
|
|
165
164
|
"""Show aggregated domain memory statistics."""
|
|
166
165
|
|
|
@@ -175,8 +174,9 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
175
174
|
table.add_row("Behaviors", str(stats.get("behaviors", 0)))
|
|
176
175
|
table.add_row("Contexts", str(stats.get("contexts", 0)))
|
|
177
176
|
console.print(table)
|
|
177
|
+
database_label = "postgres (default)" if memory_db is None else str(memory_db)
|
|
178
178
|
console.print(
|
|
179
|
-
f"[dim]Database:[/dim] {
|
|
179
|
+
f"[dim]Database:[/dim] {database_label} | [dim]Domain:[/dim] {domain or 'all'}\n"
|
|
180
180
|
)
|
|
181
181
|
|
|
182
182
|
@memory_app.command("ingest-embeddings")
|
|
@@ -212,7 +212,7 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
212
212
|
dry_run: bool = typer.Option(
|
|
213
213
|
False, "--dry-run", help="Print summary without writing to the database."
|
|
214
214
|
),
|
|
215
|
-
memory_db: Path = memory_db_option(),
|
|
215
|
+
memory_db: Path | None = memory_db_option(),
|
|
216
216
|
) -> None:
|
|
217
217
|
"""Convert Phoenix embedding exports into Domain Memory facts."""
|
|
218
218
|
|
|
@@ -292,7 +292,7 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
292
292
|
"--min-score",
|
|
293
293
|
help="최소 검증 점수 필터 (0.0~1.0).",
|
|
294
294
|
),
|
|
295
|
-
memory_db: Path = memory_db_option(),
|
|
295
|
+
memory_db: Path | None = memory_db_option(),
|
|
296
296
|
) -> None:
|
|
297
297
|
"""Search factual facts stored in domain memory."""
|
|
298
298
|
|
|
@@ -350,7 +350,7 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
350
350
|
"--min-success",
|
|
351
351
|
help="최소 성공률 필터 (0.0~1.0).",
|
|
352
352
|
),
|
|
353
|
-
memory_db: Path = memory_db_option(),
|
|
353
|
+
memory_db: Path | None = memory_db_option(),
|
|
354
354
|
) -> None:
|
|
355
355
|
"""List reusable behaviors from domain memory."""
|
|
356
356
|
|
|
@@ -401,7 +401,7 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
401
401
|
domain: str = typer.Option("insurance", "--domain", "-d", help="도메인 이름."),
|
|
402
402
|
language: str = typer.Option("ko", "--language", "-l", help="언어 코드."),
|
|
403
403
|
limit: int = typer.Option(5, "--limit", "-n", help="최대 결과 수."),
|
|
404
|
-
memory_db: Path = memory_db_option(),
|
|
404
|
+
memory_db: Path | None = memory_db_option(),
|
|
405
405
|
) -> None:
|
|
406
406
|
"""Display experiential learning entries stored in memory."""
|
|
407
407
|
|
|
@@ -444,7 +444,7 @@ def create_domain_app(console: Console) -> typer.Typer:
|
|
|
444
444
|
"-y",
|
|
445
445
|
help="확인 프롬프트를 건너뜁니다.",
|
|
446
446
|
),
|
|
447
|
-
memory_db: Path = memory_db_option(),
|
|
447
|
+
memory_db: Path | None = memory_db_option(),
|
|
448
448
|
) -> None:
|
|
449
449
|
"""Run consolidation/cleanup on stored memories."""
|
|
450
450
|
|
|
@@ -8,7 +8,8 @@ import typer
|
|
|
8
8
|
from rich.console import Console
|
|
9
9
|
from rich.table import Table
|
|
10
10
|
|
|
11
|
-
from evalvault.adapters.outbound.storage.
|
|
11
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
12
|
+
from evalvault.config.settings import Settings
|
|
12
13
|
from evalvault.domain.services.experiment_manager import ExperimentManager
|
|
13
14
|
|
|
14
15
|
from ..utils.options import db_option
|
|
@@ -48,7 +49,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
48
49
|
validate_choice(retriever_name, ["bm25", "dense", "hybrid", "graphrag"], console)
|
|
49
50
|
|
|
50
51
|
console.print("\n[bold]Creating Experiment[/bold]\n")
|
|
51
|
-
storage =
|
|
52
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
52
53
|
manager = ExperimentManager(storage)
|
|
53
54
|
metric_list = parse_csv_option(metrics)
|
|
54
55
|
metric_list = metric_list or None
|
|
@@ -88,7 +89,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
88
89
|
) -> None:
|
|
89
90
|
"""Add a group to an experiment."""
|
|
90
91
|
|
|
91
|
-
storage =
|
|
92
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
92
93
|
manager = ExperimentManager(storage)
|
|
93
94
|
try:
|
|
94
95
|
manager.add_group_to_experiment(experiment_id, group_name, description)
|
|
@@ -108,7 +109,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
108
109
|
) -> None:
|
|
109
110
|
"""Add an evaluation run to an experiment group."""
|
|
110
111
|
|
|
111
|
-
storage =
|
|
112
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
112
113
|
manager = ExperimentManager(storage)
|
|
113
114
|
try:
|
|
114
115
|
manager.add_run_to_experiment_group(experiment_id, group_name, run_id)
|
|
@@ -132,7 +133,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
132
133
|
"""List experiments."""
|
|
133
134
|
|
|
134
135
|
console.print("\n[bold]Experiments[/bold]\n")
|
|
135
|
-
storage =
|
|
136
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
136
137
|
manager = ExperimentManager(storage)
|
|
137
138
|
experiments = manager.list_experiments(status=status)
|
|
138
139
|
if not experiments:
|
|
@@ -172,7 +173,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
172
173
|
"""Compare groups inside an experiment."""
|
|
173
174
|
|
|
174
175
|
console.print("\n[bold]Experiment Comparison[/bold]\n")
|
|
175
|
-
storage =
|
|
176
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
176
177
|
manager = ExperimentManager(storage)
|
|
177
178
|
try:
|
|
178
179
|
experiment = manager.get_experiment(experiment_id)
|
|
@@ -222,7 +223,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
222
223
|
) -> None:
|
|
223
224
|
"""Conclude an experiment and record findings."""
|
|
224
225
|
|
|
225
|
-
storage =
|
|
226
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
226
227
|
manager = ExperimentManager(storage)
|
|
227
228
|
try:
|
|
228
229
|
manager.conclude_experiment(experiment_id, conclusion)
|
|
@@ -239,7 +240,7 @@ def register_experiment_commands(app: typer.Typer, console: Console) -> None:
|
|
|
239
240
|
) -> None:
|
|
240
241
|
"""Show experiment summary."""
|
|
241
242
|
|
|
242
|
-
storage =
|
|
243
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
243
244
|
manager = ExperimentManager(storage)
|
|
244
245
|
try:
|
|
245
246
|
summary = manager.get_summary(experiment_id)
|
|
@@ -9,8 +9,9 @@ import typer
|
|
|
9
9
|
from rich.console import Console
|
|
10
10
|
from rich.table import Table
|
|
11
11
|
|
|
12
|
-
from evalvault.adapters.outbound.storage.
|
|
12
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
13
13
|
from evalvault.config.phoenix_support import get_phoenix_trace_url
|
|
14
|
+
from evalvault.config.settings import Settings
|
|
14
15
|
|
|
15
16
|
from ..utils.formatters import format_diff, format_score, format_status
|
|
16
17
|
from ..utils.options import db_option
|
|
@@ -50,7 +51,7 @@ def register_gate_commands(app: typer.Typer, console: Console) -> None:
|
|
|
50
51
|
) -> None:
|
|
51
52
|
"""Quality gate check for CI/CD pipelines."""
|
|
52
53
|
|
|
53
|
-
storage =
|
|
54
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
54
55
|
|
|
55
56
|
try:
|
|
56
57
|
run = storage.get_run(run_id)
|
|
@@ -17,7 +17,7 @@ from evalvault.adapters.outbound.dataset import get_loader
|
|
|
17
17
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
18
18
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
19
19
|
from evalvault.adapters.outbound.retriever.graph_rag_adapter import GraphRAGAdapter
|
|
20
|
-
from evalvault.adapters.outbound.storage.
|
|
20
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
21
21
|
from evalvault.config.settings import Settings, apply_profile
|
|
22
22
|
from evalvault.domain.services.analysis_service import AnalysisService
|
|
23
23
|
from evalvault.domain.services.evaluator import RagasEvaluator
|
|
@@ -185,7 +185,7 @@ def create_graph_rag_app(console: Console) -> typer.Typer:
|
|
|
185
185
|
console.print(f"[green]Saved GraphRAG artifacts:[/green] {artifacts_path}")
|
|
186
186
|
|
|
187
187
|
if db_path is not None:
|
|
188
|
-
storage =
|
|
188
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
189
189
|
storage.save_run(result.baseline_run)
|
|
190
190
|
storage.save_run(result.graph_run)
|
|
191
191
|
console.print(f"[green]Saved baseline run:[/green] {result.baseline_run.run_id}")
|
|
@@ -4,13 +4,12 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import cast
|
|
8
7
|
|
|
9
8
|
import typer
|
|
10
9
|
from rich.console import Console
|
|
11
10
|
from rich.table import Table
|
|
12
11
|
|
|
13
|
-
from evalvault.adapters.outbound.storage.
|
|
12
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
14
13
|
from evalvault.config.phoenix_support import PhoenixExperimentResolver
|
|
15
14
|
from evalvault.config.settings import Settings
|
|
16
15
|
|
|
@@ -88,11 +87,7 @@ def register_history_commands(app: typer.Typer, console: Console) -> None:
|
|
|
88
87
|
"[red]Error:[/red] --mode must be one of: " + ", ".join(RUN_MODE_CHOICES)
|
|
89
88
|
)
|
|
90
89
|
raise typer.Exit(2)
|
|
91
|
-
|
|
92
|
-
if resolved_db_path is None:
|
|
93
|
-
console.print("[red]Error:[/red] Database path is not configured.")
|
|
94
|
-
raise typer.Exit(1)
|
|
95
|
-
storage = SQLiteStorageAdapter(db_path=cast(Path, resolved_db_path))
|
|
90
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
96
91
|
runs = storage.list_runs(limit=limit, dataset_name=dataset, model_name=model)
|
|
97
92
|
if normalized_mode:
|
|
98
93
|
runs = [
|
|
@@ -199,11 +194,7 @@ def register_history_commands(app: typer.Typer, console: Console) -> None:
|
|
|
199
194
|
"""
|
|
200
195
|
console.print(f"\n[bold]Exporting Run {run_id}[/bold]\n")
|
|
201
196
|
|
|
202
|
-
|
|
203
|
-
if resolved_db_path is None:
|
|
204
|
-
console.print("[red]Error:[/red] Database path is not configured.")
|
|
205
|
-
raise typer.Exit(1)
|
|
206
|
-
storage = SQLiteStorageAdapter(db_path=cast(Path, resolved_db_path))
|
|
197
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
207
198
|
|
|
208
199
|
try:
|
|
209
200
|
run = storage.get_run(run_id)
|
|
@@ -424,8 +424,7 @@ def create_method_app(console: Console) -> typer.Typer:
|
|
|
424
424
|
if eval_output:
|
|
425
425
|
_save_results(eval_output, result, console)
|
|
426
426
|
|
|
427
|
-
|
|
428
|
-
_save_to_db(db_path, result, console)
|
|
427
|
+
_save_to_db(db_path, result, console)
|
|
429
428
|
|
|
430
429
|
return method_app
|
|
431
430
|
|
|
@@ -7,7 +7,7 @@ import typer
|
|
|
7
7
|
from rich.console import Console
|
|
8
8
|
|
|
9
9
|
from evalvault.adapters.outbound.filesystem.ops_snapshot_writer import OpsSnapshotWriter
|
|
10
|
-
from evalvault.adapters.outbound.storage.
|
|
10
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
11
11
|
from evalvault.config.settings import Settings, apply_profile
|
|
12
12
|
from evalvault.domain.services.ops_snapshot_service import (
|
|
13
13
|
OpsSnapshotRequest,
|
|
@@ -57,7 +57,7 @@ def create_ops_app(console: Console) -> typer.Typer:
|
|
|
57
57
|
settings = apply_profile(settings, resolved_profile)
|
|
58
58
|
|
|
59
59
|
resolved_db_path = _resolve_storage_path(db_path)
|
|
60
|
-
storage =
|
|
60
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
61
61
|
writer = OpsSnapshotWriter()
|
|
62
62
|
service = OpsSnapshotService(
|
|
63
63
|
storage=storage,
|
|
@@ -11,7 +11,7 @@ import typer
|
|
|
11
11
|
from rich.panel import Panel
|
|
12
12
|
from rich.table import Table
|
|
13
13
|
|
|
14
|
-
from evalvault.adapters.outbound.storage.
|
|
14
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
15
15
|
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
16
16
|
from evalvault.config.settings import Settings
|
|
17
17
|
|
|
@@ -64,7 +64,7 @@ def register_pipeline_commands(app: typer.Typer, console) -> None:
|
|
|
64
64
|
console.print("[red]Error: Database path is not configured.[/red]")
|
|
65
65
|
raise typer.Exit(1)
|
|
66
66
|
|
|
67
|
-
storage =
|
|
67
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
68
68
|
llm_adapter = None
|
|
69
69
|
try:
|
|
70
70
|
llm_adapter = get_llm_adapter(settings)
|
|
@@ -10,7 +10,7 @@ import typer
|
|
|
10
10
|
from rich.console import Console
|
|
11
11
|
|
|
12
12
|
from evalvault.adapters.outbound.filesystem.difficulty_profile_writer import DifficultyProfileWriter
|
|
13
|
-
from evalvault.adapters.outbound.storage.
|
|
13
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
14
14
|
from evalvault.config.settings import Settings
|
|
15
15
|
from evalvault.domain.services.difficulty_profile_reporter import DifficultyProfileReporter
|
|
16
16
|
from evalvault.domain.services.difficulty_profiling_service import (
|
|
@@ -58,7 +58,7 @@ def register_profile_difficulty_commands(
|
|
|
58
58
|
concurrency: int | None = typer.Option(
|
|
59
59
|
None, "--concurrency", help="Max concurrency when parallel is enabled.", min=1
|
|
60
60
|
),
|
|
61
|
-
db_path: Path | None = db_option(help_text="
|
|
61
|
+
db_path: Path | None = db_option(help_text="DB path."),
|
|
62
62
|
) -> None:
|
|
63
63
|
if not dataset_name and not run_id:
|
|
64
64
|
print_cli_error(
|
|
@@ -75,15 +75,6 @@ def register_profile_difficulty_commands(
|
|
|
75
75
|
)
|
|
76
76
|
raise typer.Exit(1)
|
|
77
77
|
|
|
78
|
-
resolved_db_path = db_path or Settings().evalvault_db_path
|
|
79
|
-
if resolved_db_path is None:
|
|
80
|
-
print_cli_error(
|
|
81
|
-
console,
|
|
82
|
-
"DB 경로가 필요합니다.",
|
|
83
|
-
fixes=["--db 옵션으로 SQLite DB 경로를 지정하세요."],
|
|
84
|
-
)
|
|
85
|
-
raise typer.Exit(1)
|
|
86
|
-
|
|
87
78
|
metric_list = parse_csv_option(metrics)
|
|
88
79
|
if metric_list:
|
|
89
80
|
validate_choices(metric_list, available_metrics, console, value_label="metric")
|
|
@@ -94,7 +85,7 @@ def register_profile_difficulty_commands(
|
|
|
94
85
|
resolved_output = output_path or Path("reports") / "difficulty" / f"{prefix}.json"
|
|
95
86
|
resolved_artifacts_dir = artifacts_dir or resolved_output.parent / "artifacts" / prefix
|
|
96
87
|
|
|
97
|
-
storage =
|
|
88
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
98
89
|
writer = DifficultyProfileWriter()
|
|
99
90
|
reporter = DifficultyProfileReporter(writer)
|
|
100
91
|
service = DifficultyProfilingService(storage=storage, reporter=reporter)
|
|
@@ -13,7 +13,7 @@ from rich.table import Table
|
|
|
13
13
|
|
|
14
14
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
15
15
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
16
|
-
from evalvault.adapters.outbound.storage.
|
|
16
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
17
17
|
from evalvault.config.settings import Settings, apply_profile
|
|
18
18
|
from evalvault.domain.entities import Dataset, EvaluationRun, PromptSetBundle, TestCase
|
|
19
19
|
from evalvault.domain.services.evaluator import RagasEvaluator
|
|
@@ -42,17 +42,6 @@ def _bundle_to_role_map(bundle: PromptSetBundle) -> dict[str, dict[str, str]]:
|
|
|
42
42
|
return roles
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def _require_db_path(console: Console, db_path: Path | None) -> Path:
|
|
46
|
-
if db_path is None:
|
|
47
|
-
print_cli_error(
|
|
48
|
-
console,
|
|
49
|
-
"DB 경로가 필요합니다.",
|
|
50
|
-
fixes=["--db 옵션으로 SQLite DB 경로를 지정하세요."],
|
|
51
|
-
)
|
|
52
|
-
raise typer.Exit(1)
|
|
53
|
-
return db_path
|
|
54
|
-
|
|
55
|
-
|
|
56
45
|
def _default_role(bundle: PromptSetBundle) -> str | None:
|
|
57
46
|
for item in bundle.items:
|
|
58
47
|
if item.role == "system":
|
|
@@ -229,8 +218,7 @@ def create_prompts_app(console: Console) -> typer.Typer:
|
|
|
229
218
|
db_path: Path | None = db_option(help_text="Path to database file."),
|
|
230
219
|
) -> None:
|
|
231
220
|
"""Show prompt snapshots attached to a run."""
|
|
232
|
-
|
|
233
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db)
|
|
221
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
234
222
|
bundle = storage.get_prompt_set_for_run(run_id)
|
|
235
223
|
if not bundle:
|
|
236
224
|
console.print("[yellow]No prompt set found for this run.[/yellow]")
|
|
@@ -273,8 +261,7 @@ def create_prompts_app(console: Console) -> typer.Typer:
|
|
|
273
261
|
),
|
|
274
262
|
) -> None:
|
|
275
263
|
"""Compare prompt snapshots between two runs."""
|
|
276
|
-
|
|
277
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db)
|
|
264
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
278
265
|
bundle_a = storage.get_prompt_set_for_run(run_id_a)
|
|
279
266
|
bundle_b = storage.get_prompt_set_for_run(run_id_b)
|
|
280
267
|
|
|
@@ -462,8 +449,7 @@ def create_prompts_app(console: Console) -> typer.Typer:
|
|
|
462
449
|
) -> None:
|
|
463
450
|
"""Suggest prompt improvements by scoring candidate prompts."""
|
|
464
451
|
|
|
465
|
-
|
|
466
|
-
storage = SQLiteStorageAdapter(db_path=resolved_db)
|
|
452
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
467
453
|
|
|
468
454
|
try:
|
|
469
455
|
run = storage.get_run(run_id)
|
|
@@ -20,7 +20,8 @@ from evalvault.adapters.outbound.report.ci_report_formatter import (
|
|
|
20
20
|
from evalvault.adapters.outbound.report.pr_comment_formatter import (
|
|
21
21
|
format_ci_gate_pr_comment,
|
|
22
22
|
)
|
|
23
|
-
from evalvault.adapters.outbound.storage.
|
|
23
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
24
|
+
from evalvault.config.settings import Settings
|
|
24
25
|
from evalvault.domain.services.regression_gate_service import (
|
|
25
26
|
RegressionGateReport,
|
|
26
27
|
RegressionGateService,
|
|
@@ -133,7 +134,7 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
133
134
|
validate_choice(test, ["t-test", "mann-whitney"], console, value_label="test")
|
|
134
135
|
metric_list = parse_csv_option(metrics)
|
|
135
136
|
|
|
136
|
-
storage =
|
|
137
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
137
138
|
analysis_adapter = StatisticalAnalysisAdapter()
|
|
138
139
|
service = RegressionGateService(storage=storage, analysis_adapter=analysis_adapter)
|
|
139
140
|
|
|
@@ -243,7 +244,7 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
243
244
|
value_label="format",
|
|
244
245
|
)
|
|
245
246
|
|
|
246
|
-
storage =
|
|
247
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
247
248
|
analysis_adapter = StatisticalAnalysisAdapter()
|
|
248
249
|
service = RegressionGateService(storage=storage, analysis_adapter=analysis_adapter)
|
|
249
250
|
|
|
@@ -414,7 +415,7 @@ def register_regress_commands(app: typer.Typer, console: Console) -> None:
|
|
|
414
415
|
console.print("[red]Error:[/red] Database path is not configured.")
|
|
415
416
|
raise typer.Exit(1)
|
|
416
417
|
|
|
417
|
-
storage =
|
|
418
|
+
storage = build_storage_adapter(settings=Settings(), db_path=db_path)
|
|
418
419
|
|
|
419
420
|
if action == "set":
|
|
420
421
|
if not run_id:
|