evalvault 1.74.0__py3-none-any.whl → 1.76.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +127 -80
- evalvault/adapters/inbound/api/routers/calibration.py +9 -9
- evalvault/adapters/inbound/api/routers/chat.py +303 -17
- evalvault/adapters/inbound/api/routers/config.py +3 -1
- evalvault/adapters/inbound/api/routers/domain.py +10 -5
- evalvault/adapters/inbound/api/routers/pipeline.py +3 -3
- evalvault/adapters/inbound/api/routers/runs.py +23 -4
- evalvault/adapters/inbound/cli/commands/analyze.py +10 -12
- evalvault/adapters/inbound/cli/commands/benchmark.py +10 -8
- evalvault/adapters/inbound/cli/commands/calibrate.py +2 -7
- evalvault/adapters/inbound/cli/commands/calibrate_judge.py +2 -7
- evalvault/adapters/inbound/cli/commands/compare.py +2 -7
- evalvault/adapters/inbound/cli/commands/debug.py +3 -2
- evalvault/adapters/inbound/cli/commands/domain.py +12 -12
- evalvault/adapters/inbound/cli/commands/experiment.py +9 -8
- evalvault/adapters/inbound/cli/commands/gate.py +3 -2
- evalvault/adapters/inbound/cli/commands/graph_rag.py +2 -2
- evalvault/adapters/inbound/cli/commands/history.py +3 -12
- evalvault/adapters/inbound/cli/commands/method.py +3 -4
- evalvault/adapters/inbound/cli/commands/ops.py +2 -2
- evalvault/adapters/inbound/cli/commands/pipeline.py +2 -2
- evalvault/adapters/inbound/cli/commands/profile_difficulty.py +3 -12
- evalvault/adapters/inbound/cli/commands/prompts.py +4 -18
- evalvault/adapters/inbound/cli/commands/regress.py +5 -4
- evalvault/adapters/inbound/cli/commands/run.py +188 -59
- evalvault/adapters/inbound/cli/commands/run_helpers.py +181 -70
- evalvault/adapters/inbound/cli/commands/stage.py +6 -25
- evalvault/adapters/inbound/cli/utils/options.py +10 -4
- evalvault/adapters/inbound/mcp/tools.py +11 -8
- evalvault/adapters/outbound/analysis/embedding_analyzer_module.py +17 -1
- evalvault/adapters/outbound/analysis/embedding_searcher_module.py +14 -0
- evalvault/adapters/outbound/domain_memory/__init__.py +8 -4
- evalvault/adapters/outbound/domain_memory/factory.py +68 -0
- evalvault/adapters/outbound/domain_memory/postgres_adapter.py +1062 -0
- evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql +177 -0
- evalvault/adapters/outbound/llm/factory.py +1 -1
- evalvault/adapters/outbound/llm/vllm_adapter.py +23 -0
- evalvault/adapters/outbound/nlp/korean/dense_retriever.py +10 -7
- evalvault/adapters/outbound/nlp/korean/toolkit.py +15 -4
- evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
- evalvault/adapters/outbound/retriever/pgvector_store.py +165 -0
- evalvault/adapters/outbound/storage/base_sql.py +3 -2
- evalvault/adapters/outbound/storage/factory.py +53 -0
- evalvault/adapters/outbound/storage/postgres_schema.sql +2 -0
- evalvault/adapters/outbound/tracker/mlflow_adapter.py +209 -54
- evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
- evalvault/config/instrumentation.py +8 -6
- evalvault/config/phoenix_support.py +5 -0
- evalvault/config/settings.py +71 -11
- evalvault/domain/services/domain_learning_hook.py +2 -1
- evalvault/domain/services/evaluator.py +2 -0
- evalvault/ports/inbound/web_port.py +3 -1
- evalvault/ports/outbound/storage_port.py +2 -0
- evalvault-1.76.0.dist-info/METADATA +221 -0
- {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/RECORD +58 -53
- evalvault-1.74.0.dist-info/METADATA +0 -585
- {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/WHEEL +0 -0
- {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.74.0.dist-info → evalvault-1.76.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -21,7 +21,7 @@ from evalvault.adapters.outbound.dataset import get_loader, load_multiturn_datas
|
|
|
21
21
|
from evalvault.adapters.outbound.documents.versioned_loader import (
|
|
22
22
|
load_versioned_chunks_from_pdf_dir,
|
|
23
23
|
)
|
|
24
|
-
from evalvault.adapters.outbound.domain_memory
|
|
24
|
+
from evalvault.adapters.outbound.domain_memory import build_domain_memory_adapter
|
|
25
25
|
from evalvault.adapters.outbound.llm import SettingsLLMFactory, get_llm_adapter
|
|
26
26
|
from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
|
|
27
27
|
from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
@@ -30,10 +30,10 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
|
30
30
|
PhoenixSyncService,
|
|
31
31
|
build_experiment_metadata,
|
|
32
32
|
)
|
|
33
|
-
from evalvault.adapters.outbound.storage.
|
|
33
|
+
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
34
34
|
from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
|
|
35
35
|
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
36
|
-
from evalvault.config.settings import Settings, apply_profile
|
|
36
|
+
from evalvault.config.settings import Settings, apply_profile, resolve_tracker_providers
|
|
37
37
|
from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
|
|
38
38
|
from evalvault.domain.entities.multiturn import (
|
|
39
39
|
MultiTurnConversationRecord,
|
|
@@ -57,6 +57,7 @@ from evalvault.domain.services.ragas_prompt_overrides import (
|
|
|
57
57
|
)
|
|
58
58
|
from evalvault.domain.services.retriever_context import apply_versioned_retriever_to_dataset
|
|
59
59
|
from evalvault.domain.services.stage_event_builder import StageEventBuilder
|
|
60
|
+
from evalvault.ports.outbound.domain_memory_port import DomainMemoryPort
|
|
60
61
|
from evalvault.ports.outbound.korean_nlp_port import RetrieverPort
|
|
61
62
|
|
|
62
63
|
from ..utils.analysis_io import (
|
|
@@ -85,7 +86,8 @@ from .run_helpers import (
|
|
|
85
86
|
_display_results,
|
|
86
87
|
_evaluate_streaming_run,
|
|
87
88
|
_is_oss_open_model,
|
|
88
|
-
|
|
89
|
+
_log_analysis_artifacts,
|
|
90
|
+
_log_to_trackers,
|
|
89
91
|
_option_was_provided,
|
|
90
92
|
_print_run_mode_banner,
|
|
91
93
|
_resolve_thresholds,
|
|
@@ -111,7 +113,7 @@ def _build_dense_retriever(
|
|
|
111
113
|
settings: Settings,
|
|
112
114
|
profile_name: str | None,
|
|
113
115
|
) -> Any:
|
|
114
|
-
"""Build and index a dense retriever, preferring
|
|
116
|
+
"""Build and index a dense retriever, preferring OpenAI-compatible embeddings when available."""
|
|
115
117
|
|
|
116
118
|
from evalvault.adapters.outbound.nlp.korean.dense_retriever import KoreanDenseRetriever
|
|
117
119
|
|
|
@@ -135,6 +137,17 @@ def _build_dense_retriever(
|
|
|
135
137
|
dense_retriever.index(documents)
|
|
136
138
|
return dense_retriever
|
|
137
139
|
|
|
140
|
+
if settings.llm_provider == "vllm":
|
|
141
|
+
from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
|
|
142
|
+
|
|
143
|
+
adapter = VLLMAdapter(settings)
|
|
144
|
+
dense_retriever = KoreanDenseRetriever(
|
|
145
|
+
model_name=settings.vllm_embedding_model,
|
|
146
|
+
ollama_adapter=adapter,
|
|
147
|
+
)
|
|
148
|
+
dense_retriever.index(documents)
|
|
149
|
+
return dense_retriever
|
|
150
|
+
|
|
138
151
|
try:
|
|
139
152
|
dense_retriever = KoreanDenseRetriever()
|
|
140
153
|
dense_retriever.index(documents)
|
|
@@ -142,7 +155,8 @@ def _build_dense_retriever(
|
|
|
142
155
|
except Exception as exc:
|
|
143
156
|
raise RuntimeError(
|
|
144
157
|
"Dense retriever initialization failed. "
|
|
145
|
-
"Use --profile dev/prod (Ollama embedding),
|
|
158
|
+
"Use --profile dev/prod (Ollama embedding), --profile vllm (vLLM embedding), "
|
|
159
|
+
"or install/prepare a local embedding model."
|
|
146
160
|
) from exc
|
|
147
161
|
|
|
148
162
|
|
|
@@ -165,6 +179,14 @@ def _log_duration(
|
|
|
165
179
|
_log_timestamp(console, verbose, f"{message} ({elapsed:.2f}s)")
|
|
166
180
|
|
|
167
181
|
|
|
182
|
+
def _infer_phoenix_model_provider(model_name: str) -> str:
|
|
183
|
+
if not model_name:
|
|
184
|
+
return "OPENAI"
|
|
185
|
+
provider = model_name.split("/")[0].upper() if "/" in model_name else "OPENAI"
|
|
186
|
+
allowed = {"OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE", "DEEPSEEK", "XAI", "AWS", "OLLAMA"}
|
|
187
|
+
return provider if provider in allowed else "OPENAI"
|
|
188
|
+
|
|
189
|
+
|
|
168
190
|
def register_run_commands(
|
|
169
191
|
app: typer.Typer,
|
|
170
192
|
console: Console,
|
|
@@ -345,10 +367,13 @@ def register_run_commands(
|
|
|
345
367
|
help="Store stage events in the SQLite database (requires --db).",
|
|
346
368
|
),
|
|
347
369
|
tracker: str = typer.Option(
|
|
348
|
-
"
|
|
370
|
+
"mlflow+phoenix",
|
|
349
371
|
"--tracker",
|
|
350
372
|
"-t",
|
|
351
|
-
help=
|
|
373
|
+
help=(
|
|
374
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
375
|
+
"or combinations like 'mlflow+phoenix'."
|
|
376
|
+
),
|
|
352
377
|
rich_help_panel="Simple mode preset",
|
|
353
378
|
),
|
|
354
379
|
langfuse: bool = typer.Option(
|
|
@@ -654,13 +679,24 @@ def register_run_commands(
|
|
|
654
679
|
tracker_override = _option_was_provided(ctx, "tracker") or langfuse
|
|
655
680
|
selected_tracker = tracker
|
|
656
681
|
if preset.default_tracker:
|
|
657
|
-
if tracker_override
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
682
|
+
if tracker_override:
|
|
683
|
+
try:
|
|
684
|
+
providers = resolve_tracker_providers(tracker)
|
|
685
|
+
except ValueError as exc:
|
|
686
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
687
|
+
raise typer.Exit(2) from exc
|
|
688
|
+
if providers == ["none"]:
|
|
689
|
+
selected_tracker = preset.default_tracker
|
|
690
|
+
elif preset.default_tracker not in providers:
|
|
691
|
+
print_cli_warning(
|
|
692
|
+
console,
|
|
693
|
+
f"Simple 모드는 tracker에 {preset.default_tracker}가 포함되어야 합니다.",
|
|
694
|
+
tips=["다른 Tracker를 사용하려면 --mode full을 사용하세요."],
|
|
695
|
+
)
|
|
696
|
+
providers.append(preset.default_tracker)
|
|
697
|
+
selected_tracker = "+".join(providers)
|
|
698
|
+
else:
|
|
699
|
+
selected_tracker = preset.default_tracker
|
|
664
700
|
tracker = selected_tracker
|
|
665
701
|
|
|
666
702
|
prompt_manifest_value = prompt_manifest
|
|
@@ -1633,10 +1669,29 @@ def register_run_commands(
|
|
|
1633
1669
|
)
|
|
1634
1670
|
raise typer.Exit(2) from exc
|
|
1635
1671
|
|
|
1672
|
+
effective_tracker = tracker
|
|
1673
|
+
if langfuse and tracker == "none" and not preset.default_tracker:
|
|
1674
|
+
effective_tracker = "langfuse"
|
|
1675
|
+
print_cli_warning(
|
|
1676
|
+
console,
|
|
1677
|
+
"--langfuse 플래그는 곧 제거됩니다.",
|
|
1678
|
+
tips=["대신 --tracker langfuse를 사용하세요."],
|
|
1679
|
+
)
|
|
1680
|
+
|
|
1681
|
+
try:
|
|
1682
|
+
effective_providers = resolve_tracker_providers(effective_tracker)
|
|
1683
|
+
except ValueError as exc:
|
|
1684
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
1685
|
+
raise typer.Exit(2) from exc
|
|
1686
|
+
|
|
1636
1687
|
phoenix_dataset_name = phoenix_dataset
|
|
1637
1688
|
if phoenix_experiment and not phoenix_dataset_name:
|
|
1638
1689
|
phoenix_dataset_name = f"{ds.name}:{ds.version}"
|
|
1639
1690
|
|
|
1691
|
+
auto_phoenix_sync = "phoenix" in effective_providers
|
|
1692
|
+
if auto_phoenix_sync and not phoenix_dataset_name:
|
|
1693
|
+
phoenix_dataset_name = f"{ds.name}:{ds.version}"
|
|
1694
|
+
|
|
1640
1695
|
phoenix_dataset_description_value = phoenix_dataset_description
|
|
1641
1696
|
if phoenix_dataset_name and not phoenix_dataset_description_value:
|
|
1642
1697
|
desc_source = ds.metadata.get("description") if isinstance(ds.metadata, dict) else None
|
|
@@ -1646,13 +1701,20 @@ def register_run_commands(
|
|
|
1646
1701
|
phoenix_dataset_result: dict[str, Any] | None = None
|
|
1647
1702
|
phoenix_experiment_result: dict[str, Any] | None = None
|
|
1648
1703
|
|
|
1649
|
-
if phoenix_dataset_name or phoenix_experiment:
|
|
1704
|
+
if phoenix_dataset_name or phoenix_experiment or auto_phoenix_sync:
|
|
1650
1705
|
try:
|
|
1651
1706
|
phoenix_sync_service = PhoenixSyncService(
|
|
1652
1707
|
endpoint=settings.phoenix_endpoint,
|
|
1653
1708
|
api_token=getattr(settings, "phoenix_api_token", None),
|
|
1654
1709
|
)
|
|
1655
1710
|
except PhoenixSyncError as exc:
|
|
1711
|
+
if auto_phoenix_sync:
|
|
1712
|
+
print_cli_error(
|
|
1713
|
+
console,
|
|
1714
|
+
"Phoenix Sync 서비스를 초기화할 수 없습니다.",
|
|
1715
|
+
details=str(exc),
|
|
1716
|
+
)
|
|
1717
|
+
raise typer.Exit(2) from exc
|
|
1656
1718
|
print_cli_warning(
|
|
1657
1719
|
console,
|
|
1658
1720
|
"Phoenix Sync 서비스를 초기화할 수 없습니다.",
|
|
@@ -1660,19 +1722,10 @@ def register_run_commands(
|
|
|
1660
1722
|
)
|
|
1661
1723
|
phoenix_sync_service = None
|
|
1662
1724
|
|
|
1663
|
-
effective_tracker = tracker
|
|
1664
|
-
if langfuse and tracker == "none" and not preset.default_tracker:
|
|
1665
|
-
effective_tracker = "langfuse"
|
|
1666
|
-
print_cli_warning(
|
|
1667
|
-
console,
|
|
1668
|
-
"--langfuse 플래그는 곧 제거됩니다.",
|
|
1669
|
-
tips=["대신 --tracker langfuse를 사용하세요."],
|
|
1670
|
-
)
|
|
1671
|
-
|
|
1672
1725
|
config_wants_phoenix = getattr(settings, "phoenix_enabled", False)
|
|
1673
1726
|
if not isinstance(config_wants_phoenix, bool):
|
|
1674
1727
|
config_wants_phoenix = False
|
|
1675
|
-
should_enable_phoenix =
|
|
1728
|
+
should_enable_phoenix = "phoenix" in effective_providers or config_wants_phoenix
|
|
1676
1729
|
if should_enable_phoenix:
|
|
1677
1730
|
ensure_phoenix_instrumentation(settings, console=console, force=True)
|
|
1678
1731
|
|
|
@@ -1729,7 +1782,7 @@ def register_run_commands(
|
|
|
1729
1782
|
|
|
1730
1783
|
assert llm_adapter is not None
|
|
1731
1784
|
|
|
1732
|
-
memory_adapter:
|
|
1785
|
+
memory_adapter: DomainMemoryPort | None = None
|
|
1733
1786
|
memory_evaluator: MemoryAwareEvaluator | None = None
|
|
1734
1787
|
memory_domain_name = memory_domain or ds.metadata.get("domain") or "default"
|
|
1735
1788
|
memory_required = domain_memory_requested
|
|
@@ -1751,8 +1804,15 @@ def register_run_commands(
|
|
|
1751
1804
|
f"Domain Memory 초기화 시작 (domain={memory_domain_name}, lang={memory_language})",
|
|
1752
1805
|
)
|
|
1753
1806
|
try:
|
|
1754
|
-
|
|
1755
|
-
|
|
1807
|
+
if memory_db:
|
|
1808
|
+
memory_db_path = memory_db
|
|
1809
|
+
elif settings.db_backend == "sqlite":
|
|
1810
|
+
memory_db_path = settings.evalvault_memory_db_path
|
|
1811
|
+
else:
|
|
1812
|
+
memory_db_path = None
|
|
1813
|
+
memory_adapter = build_domain_memory_adapter(
|
|
1814
|
+
settings=settings, db_path=Path(memory_db_path) if memory_db_path else None
|
|
1815
|
+
)
|
|
1756
1816
|
memory_evaluator = MemoryAwareEvaluator(
|
|
1757
1817
|
evaluator=evaluator,
|
|
1758
1818
|
memory_port=memory_adapter,
|
|
@@ -2012,6 +2072,9 @@ def register_run_commands(
|
|
|
2012
2072
|
)
|
|
2013
2073
|
if prompt_bundle:
|
|
2014
2074
|
result.tracker_metadata["prompt_set"] = build_prompt_summary(prompt_bundle)
|
|
2075
|
+
result.tracker_metadata["prompt_set_detail"] = prompt_bundle.to_dict(
|
|
2076
|
+
include_content=True
|
|
2077
|
+
)
|
|
2015
2078
|
|
|
2016
2079
|
if retriever_instance or used_versioned_prefill:
|
|
2017
2080
|
retriever_tracker_meta: dict[str, Any] = {
|
|
@@ -2085,13 +2148,29 @@ def register_run_commands(
|
|
|
2085
2148
|
)
|
|
2086
2149
|
console.print(f"[dim]View datasets: {dataset_info.url}[/dim]")
|
|
2087
2150
|
except PhoenixSyncError as exc:
|
|
2151
|
+
if auto_phoenix_sync:
|
|
2152
|
+
print_cli_error(
|
|
2153
|
+
console,
|
|
2154
|
+
"Phoenix Dataset 업로드에 실패했습니다.",
|
|
2155
|
+
details=str(exc),
|
|
2156
|
+
)
|
|
2157
|
+
raise typer.Exit(2) from exc
|
|
2088
2158
|
print_cli_warning(
|
|
2089
2159
|
console,
|
|
2090
2160
|
"Phoenix Dataset 업로드에 실패했습니다.",
|
|
2091
2161
|
tips=[str(exc)],
|
|
2092
2162
|
)
|
|
2163
|
+
if auto_phoenix_sync and not phoenix_experiment:
|
|
2164
|
+
phoenix_experiment = f"{result.model_name}-{result.run_id[:8]}"
|
|
2093
2165
|
if phoenix_experiment:
|
|
2094
2166
|
if not phoenix_dataset_result:
|
|
2167
|
+
if auto_phoenix_sync:
|
|
2168
|
+
print_cli_error(
|
|
2169
|
+
console,
|
|
2170
|
+
"Dataset 업로드에 실패해 Phoenix Experiment 생성을 진행할 수 없습니다.",
|
|
2171
|
+
details="Phoenix dataset 업로드가 필요합니다.",
|
|
2172
|
+
)
|
|
2173
|
+
raise typer.Exit(2)
|
|
2095
2174
|
print_cli_warning(
|
|
2096
2175
|
console,
|
|
2097
2176
|
"Dataset 업로드에 실패해 Phoenix Experiment 생성을 건너뜁니다.",
|
|
@@ -2149,6 +2228,41 @@ def register_run_commands(
|
|
|
2149
2228
|
phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
|
|
2150
2229
|
phoenix_meta.setdefault("schema_version", 2)
|
|
2151
2230
|
phoenix_meta["prompts"] = prompt_metadata_entries
|
|
2231
|
+
if phoenix_sync_service and "phoenix" in effective_providers:
|
|
2232
|
+
try:
|
|
2233
|
+
prompt_set_summary = result.tracker_metadata.get("prompt_set") or {}
|
|
2234
|
+
prompt_set_name = prompt_set_summary.get("prompt_set_name")
|
|
2235
|
+
prompt_entries = list(prompt_metadata_entries)
|
|
2236
|
+
prompt_set_detail = result.tracker_metadata.get("prompt_set_detail")
|
|
2237
|
+
if isinstance(prompt_set_detail, dict):
|
|
2238
|
+
for item in prompt_set_detail.get("items", []):
|
|
2239
|
+
prompt = item.get("prompt") or {}
|
|
2240
|
+
if not isinstance(prompt, dict):
|
|
2241
|
+
continue
|
|
2242
|
+
prompt_entries.append(
|
|
2243
|
+
{
|
|
2244
|
+
"name": prompt.get("name"),
|
|
2245
|
+
"role": item.get("role"),
|
|
2246
|
+
"kind": prompt.get("kind"),
|
|
2247
|
+
"checksum": prompt.get("checksum"),
|
|
2248
|
+
"content": prompt.get("content"),
|
|
2249
|
+
"source": prompt.get("source"),
|
|
2250
|
+
}
|
|
2251
|
+
)
|
|
2252
|
+
synced = phoenix_sync_service.sync_prompts(
|
|
2253
|
+
prompt_entries=prompt_entries,
|
|
2254
|
+
model_name=result.model_name,
|
|
2255
|
+
model_provider=_infer_phoenix_model_provider(result.model_name),
|
|
2256
|
+
prompt_set_name=prompt_set_name,
|
|
2257
|
+
)
|
|
2258
|
+
if synced:
|
|
2259
|
+
phoenix_meta["prompts"] = synced
|
|
2260
|
+
except PhoenixSyncError as exc:
|
|
2261
|
+
print_cli_warning(
|
|
2262
|
+
console,
|
|
2263
|
+
"Phoenix Prompt 동기화에 실패했습니다.",
|
|
2264
|
+
tips=[str(exc)],
|
|
2265
|
+
)
|
|
2152
2266
|
|
|
2153
2267
|
if stage_events or stage_store:
|
|
2154
2268
|
stage_event_builder = StageEventBuilder()
|
|
@@ -2161,20 +2275,13 @@ def register_run_commands(
|
|
|
2161
2275
|
stored = _write_stage_events_jsonl(stage_events, stage_event_payload)
|
|
2162
2276
|
console.print(f"[green]Saved {stored} stage event(s).[/green]")
|
|
2163
2277
|
if stage_store:
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
2168
|
-
else:
|
|
2169
|
-
print_cli_warning(
|
|
2170
|
-
console,
|
|
2171
|
-
"Stage 이벤트를 저장하려면 --db 경로가 필요합니다.",
|
|
2172
|
-
tips=["--db <sqlite_path> 옵션을 함께 지정하세요."],
|
|
2173
|
-
)
|
|
2278
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2279
|
+
stored = storage.save_stage_events(stage_event_payload)
|
|
2280
|
+
console.print(f"[green]Stored {stored} stage event(s).[/green]")
|
|
2174
2281
|
|
|
2175
2282
|
if effective_tracker != "none":
|
|
2176
2283
|
phoenix_opts = None
|
|
2177
|
-
if
|
|
2284
|
+
if "phoenix" in effective_providers:
|
|
2178
2285
|
phoenix_opts = {
|
|
2179
2286
|
"max_traces": phoenix_max_traces,
|
|
2180
2287
|
"metadata": phoenix_trace_metadata or None,
|
|
@@ -2185,7 +2292,7 @@ def register_run_commands(
|
|
|
2185
2292
|
verbose,
|
|
2186
2293
|
f"Tracker 로깅 시작 ({effective_tracker})",
|
|
2187
2294
|
)
|
|
2188
|
-
|
|
2295
|
+
_log_to_trackers(
|
|
2189
2296
|
settings,
|
|
2190
2297
|
result,
|
|
2191
2298
|
console,
|
|
@@ -2194,23 +2301,21 @@ def register_run_commands(
|
|
|
2194
2301
|
log_phoenix_traces_fn=log_phoenix_traces,
|
|
2195
2302
|
)
|
|
2196
2303
|
_log_duration(console, verbose, "Tracker 로깅 완료", tracker_started_at)
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
)
|
|
2208
|
-
_log_duration(console, verbose, "DB 저장 완료", db_started_at)
|
|
2304
|
+
db_started_at = datetime.now()
|
|
2305
|
+
_log_timestamp(console, verbose, "DB 저장 시작")
|
|
2306
|
+
_save_to_db(
|
|
2307
|
+
db_path,
|
|
2308
|
+
result,
|
|
2309
|
+
console,
|
|
2310
|
+
prompt_bundle=prompt_bundle,
|
|
2311
|
+
export_excel=excel_output is None,
|
|
2312
|
+
)
|
|
2313
|
+
_log_duration(console, verbose, "DB 저장 완료", db_started_at)
|
|
2209
2314
|
if excel_output:
|
|
2210
2315
|
excel_started_at = datetime.now()
|
|
2211
2316
|
_log_timestamp(console, verbose, f"엑셀 저장 시작 ({excel_output})")
|
|
2212
2317
|
try:
|
|
2213
|
-
storage =
|
|
2318
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2214
2319
|
storage.export_run_to_excel(result.run_id, excel_output)
|
|
2215
2320
|
console.print(f"[green]Excel export saved: {excel_output}[/green]")
|
|
2216
2321
|
except Exception as exc:
|
|
@@ -2242,7 +2347,7 @@ def register_run_commands(
|
|
|
2242
2347
|
prefix=analysis_prefix,
|
|
2243
2348
|
)
|
|
2244
2349
|
console.print("\n[bold]자동 분석 실행[/bold]")
|
|
2245
|
-
storage =
|
|
2350
|
+
storage = build_storage_adapter(settings=settings, db_path=db_path)
|
|
2246
2351
|
pipeline_service = build_analysis_pipeline_service(
|
|
2247
2352
|
storage=storage,
|
|
2248
2353
|
llm_adapter=llm_adapter,
|
|
@@ -2265,6 +2370,12 @@ def register_run_commands(
|
|
|
2265
2370
|
pipeline_result,
|
|
2266
2371
|
artifacts_dir=artifacts_dir,
|
|
2267
2372
|
)
|
|
2373
|
+
result.tracker_metadata["analysis_artifacts"] = {
|
|
2374
|
+
"dir": artifact_index.get("dir"),
|
|
2375
|
+
"index": artifact_index.get("index"),
|
|
2376
|
+
"output": str(analysis_output_path),
|
|
2377
|
+
"report": str(analysis_report_path),
|
|
2378
|
+
}
|
|
2268
2379
|
payload = serialize_pipeline_result(pipeline_result)
|
|
2269
2380
|
payload["run_id"] = result.run_id
|
|
2270
2381
|
payload["artifacts"] = artifact_index
|
|
@@ -2281,6 +2392,18 @@ def register_run_commands(
|
|
|
2281
2392
|
"[green]자동 분석 상세 결과 저장:[/green] "
|
|
2282
2393
|
f"{artifact_index['dir']} (index: {artifact_index['index']})\n"
|
|
2283
2394
|
)
|
|
2395
|
+
if effective_tracker != "none":
|
|
2396
|
+
_log_analysis_artifacts(
|
|
2397
|
+
settings,
|
|
2398
|
+
result,
|
|
2399
|
+
console,
|
|
2400
|
+
effective_tracker,
|
|
2401
|
+
analysis_payload=payload,
|
|
2402
|
+
artifact_index=artifact_index,
|
|
2403
|
+
report_text=report_text,
|
|
2404
|
+
output_path=analysis_output_path,
|
|
2405
|
+
report_path=analysis_report_path,
|
|
2406
|
+
)
|
|
2284
2407
|
|
|
2285
2408
|
@app.command(
|
|
2286
2409
|
name="run-simple",
|
|
@@ -2384,10 +2507,13 @@ def register_run_commands(
|
|
|
2384
2507
|
help="Store stage events in the SQLite database (requires --db).",
|
|
2385
2508
|
),
|
|
2386
2509
|
tracker: str = typer.Option(
|
|
2387
|
-
"
|
|
2510
|
+
"mlflow+phoenix",
|
|
2388
2511
|
"--tracker",
|
|
2389
2512
|
"-t",
|
|
2390
|
-
help=
|
|
2513
|
+
help=(
|
|
2514
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
2515
|
+
"or combinations like 'mlflow+phoenix'."
|
|
2516
|
+
),
|
|
2391
2517
|
),
|
|
2392
2518
|
langfuse: bool = typer.Option(
|
|
2393
2519
|
False,
|
|
@@ -2676,10 +2802,13 @@ def register_run_commands(
|
|
|
2676
2802
|
help="Store stage events in the SQLite database (requires --db).",
|
|
2677
2803
|
),
|
|
2678
2804
|
tracker: str = typer.Option(
|
|
2679
|
-
"
|
|
2805
|
+
"mlflow+phoenix",
|
|
2680
2806
|
"--tracker",
|
|
2681
2807
|
"-t",
|
|
2682
|
-
help=
|
|
2808
|
+
help=(
|
|
2809
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
2810
|
+
"or combinations like 'mlflow+phoenix'."
|
|
2811
|
+
),
|
|
2683
2812
|
),
|
|
2684
2813
|
langfuse: bool = typer.Option(
|
|
2685
2814
|
False,
|