evalvault 1.75.0__py3-none-any.whl → 1.76.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +99 -63
- evalvault/adapters/inbound/api/routers/config.py +3 -1
- evalvault/adapters/inbound/cli/commands/method.py +2 -2
- evalvault/adapters/inbound/cli/commands/run.py +146 -28
- evalvault/adapters/inbound/cli/commands/run_helpers.py +157 -55
- evalvault/adapters/outbound/llm/factory.py +1 -1
- evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
- evalvault/adapters/outbound/tracker/mlflow_adapter.py +209 -54
- evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
- evalvault/config/instrumentation.py +8 -6
- evalvault/config/phoenix_support.py +5 -0
- evalvault/config/settings.py +40 -4
- evalvault/domain/services/evaluator.py +2 -0
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/METADATA +1 -1
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/RECORD +18 -18
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/WHEEL +0 -0
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -24,7 +24,7 @@ from evalvault.adapters.outbound.judge_calibration_reporter import JudgeCalibrat
|
|
|
24
24
|
from evalvault.adapters.outbound.ops.report_renderer import render_json, render_markdown
|
|
25
25
|
from evalvault.adapters.outbound.report import MarkdownReportAdapter
|
|
26
26
|
from evalvault.config.phoenix_support import PhoenixExperimentResolver
|
|
27
|
-
from evalvault.config.settings import Settings
|
|
27
|
+
from evalvault.config.settings import Settings, resolve_tracker_providers
|
|
28
28
|
from evalvault.domain.entities import (
|
|
29
29
|
CalibrationResult,
|
|
30
30
|
FeedbackSummary,
|
|
@@ -217,56 +217,83 @@ class WebUIAdapter:
|
|
|
217
217
|
logger.warning(f"Failed to create LLM adapter for {model_id}: {e}, using default")
|
|
218
218
|
return self._llm_adapter
|
|
219
219
|
|
|
220
|
-
def
|
|
220
|
+
def _get_trackers(
|
|
221
221
|
self,
|
|
222
222
|
settings: Settings,
|
|
223
223
|
tracker_config: dict[str, Any] | None,
|
|
224
|
-
) -> tuple[str
|
|
225
|
-
provider = (tracker_config or {}).get("provider") or "none"
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
224
|
+
) -> list[tuple[str, Any]]:
|
|
225
|
+
provider = (tracker_config or {}).get("provider") or settings.tracker_provider or "none"
|
|
226
|
+
providers = resolve_tracker_providers(provider)
|
|
227
|
+
if not providers or providers == ["none"]:
|
|
228
|
+
return []
|
|
229
|
+
required = {"mlflow", "phoenix"}
|
|
230
|
+
if not required.issubset(set(providers)):
|
|
231
|
+
raise RuntimeError("Tracker must include both mlflow and phoenix")
|
|
232
|
+
|
|
233
|
+
trackers: list[tuple[str, Any]] = []
|
|
234
|
+
for entry in providers:
|
|
235
|
+
if entry == "langfuse":
|
|
236
|
+
if not settings.langfuse_public_key or not settings.langfuse_secret_key:
|
|
237
|
+
raise RuntimeError("Langfuse credentials missing")
|
|
238
|
+
from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
|
|
239
|
+
|
|
240
|
+
trackers.append(
|
|
241
|
+
(
|
|
242
|
+
entry,
|
|
243
|
+
LangfuseAdapter(
|
|
244
|
+
public_key=settings.langfuse_public_key,
|
|
245
|
+
secret_key=settings.langfuse_secret_key,
|
|
246
|
+
host=settings.langfuse_host,
|
|
247
|
+
),
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
continue
|
|
242
251
|
|
|
243
|
-
|
|
244
|
-
|
|
252
|
+
if entry == "phoenix":
|
|
253
|
+
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
245
254
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
255
|
+
ensure_phoenix_instrumentation(settings, force=True)
|
|
256
|
+
try:
|
|
257
|
+
from evalvault.adapters.outbound.tracker.phoenix_adapter import PhoenixAdapter
|
|
258
|
+
except ImportError as exc:
|
|
259
|
+
raise RuntimeError("Phoenix extras not installed") from exc
|
|
260
|
+
trackers.append(
|
|
261
|
+
(
|
|
262
|
+
entry,
|
|
263
|
+
PhoenixAdapter(
|
|
264
|
+
endpoint=settings.phoenix_endpoint,
|
|
265
|
+
project_name=getattr(settings, "phoenix_project_name", None),
|
|
266
|
+
annotations_enabled=getattr(
|
|
267
|
+
settings,
|
|
268
|
+
"phoenix_annotations_enabled",
|
|
269
|
+
True,
|
|
270
|
+
),
|
|
271
|
+
),
|
|
272
|
+
)
|
|
273
|
+
)
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
if entry == "mlflow":
|
|
277
|
+
if not settings.mlflow_tracking_uri:
|
|
278
|
+
raise RuntimeError("MLflow tracking URI missing")
|
|
279
|
+
try:
|
|
280
|
+
from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
|
|
281
|
+
except ImportError as exc:
|
|
282
|
+
raise RuntimeError("MLflow adapter unavailable") from exc
|
|
283
|
+
trackers.append(
|
|
284
|
+
(
|
|
285
|
+
entry,
|
|
286
|
+
MLflowAdapter(
|
|
287
|
+
tracking_uri=settings.mlflow_tracking_uri,
|
|
288
|
+
experiment_name=settings.mlflow_experiment_name,
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
continue
|
|
267
293
|
|
|
268
|
-
|
|
269
|
-
|
|
294
|
+
raise RuntimeError(f"Unknown tracker provider: {entry}")
|
|
295
|
+
|
|
296
|
+
return trackers
|
|
270
297
|
|
|
271
298
|
@staticmethod
|
|
272
299
|
def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
|
|
@@ -425,7 +452,11 @@ class WebUIAdapter:
|
|
|
425
452
|
dataset.metadata["domain"] = requested_domain
|
|
426
453
|
|
|
427
454
|
settings = self._settings or Settings()
|
|
428
|
-
|
|
455
|
+
try:
|
|
456
|
+
trackers = self._get_trackers(settings, request.tracker_config)
|
|
457
|
+
except RuntimeError as exc:
|
|
458
|
+
raise RuntimeError(f"Tracker configuration error: {exc}") from exc
|
|
459
|
+
tracker_providers = [provider for provider, _ in trackers]
|
|
429
460
|
stage_store = bool(request.stage_store)
|
|
430
461
|
|
|
431
462
|
retriever_instance = None
|
|
@@ -484,7 +515,7 @@ class WebUIAdapter:
|
|
|
484
515
|
)
|
|
485
516
|
from evalvault.domain.services.memory_aware_evaluator import MemoryAwareEvaluator
|
|
486
517
|
|
|
487
|
-
tracer = PhoenixTracerAdapter() if
|
|
518
|
+
tracer = PhoenixTracerAdapter() if "phoenix" in tracker_providers else None
|
|
488
519
|
memory_adapter = build_domain_memory_adapter(
|
|
489
520
|
settings=self._settings,
|
|
490
521
|
db_path=Path(memory_db_path) if memory_db_path else None,
|
|
@@ -696,22 +727,27 @@ class WebUIAdapter:
|
|
|
696
727
|
str(request.threshold_profile).strip().lower()
|
|
697
728
|
)
|
|
698
729
|
|
|
699
|
-
if
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
730
|
+
if trackers:
|
|
731
|
+
result.tracker_metadata.setdefault("tracker_providers", tracker_providers)
|
|
732
|
+
for provider, tracker in trackers:
|
|
733
|
+
try:
|
|
734
|
+
trace_id = tracker.log_evaluation_run(result)
|
|
735
|
+
provider_meta = result.tracker_metadata.setdefault(provider, {})
|
|
736
|
+
if isinstance(provider_meta, dict):
|
|
737
|
+
provider_meta.setdefault("trace_id", trace_id)
|
|
738
|
+
if provider == "phoenix":
|
|
739
|
+
endpoint = settings.phoenix_endpoint or "http://localhost:6006/v1/traces"
|
|
740
|
+
phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
|
|
741
|
+
phoenix_meta.update(
|
|
742
|
+
{
|
|
743
|
+
"trace_id": trace_id,
|
|
744
|
+
"endpoint": endpoint,
|
|
745
|
+
"trace_url": self._build_phoenix_trace_url(endpoint, trace_id),
|
|
746
|
+
"schema_version": 2,
|
|
747
|
+
}
|
|
748
|
+
)
|
|
749
|
+
except Exception as exc:
|
|
750
|
+
raise RuntimeError(f"Tracker logging failed for {provider}: {exc}") from exc
|
|
715
751
|
|
|
716
752
|
if stage_store and self._storage and hasattr(self._storage, "save_stage_events"):
|
|
717
753
|
try:
|
|
@@ -71,7 +71,9 @@ class ConfigUpdateRequest(BaseModel):
|
|
|
71
71
|
phoenix_endpoint: str | None = None
|
|
72
72
|
phoenix_enabled: bool | None = None
|
|
73
73
|
phoenix_sample_rate: float | None = None
|
|
74
|
-
|
|
74
|
+
phoenix_project_name: str | None = None
|
|
75
|
+
phoenix_annotations_enabled: bool | None = None
|
|
76
|
+
tracker_provider: str | None = None
|
|
75
77
|
postgres_host: str | None = None
|
|
76
78
|
postgres_port: int | None = None
|
|
77
79
|
postgres_database: str | None = None
|
|
@@ -31,7 +31,7 @@ from ..utils.validators import parse_csv_option, validate_choices
|
|
|
31
31
|
from .run_helpers import (
|
|
32
32
|
_display_results,
|
|
33
33
|
_is_oss_open_model,
|
|
34
|
-
|
|
34
|
+
_log_to_trackers,
|
|
35
35
|
_resolve_thresholds,
|
|
36
36
|
_save_results,
|
|
37
37
|
_save_to_db,
|
|
@@ -419,7 +419,7 @@ def create_method_app(console: Console) -> typer.Typer:
|
|
|
419
419
|
_display_results(result, console)
|
|
420
420
|
|
|
421
421
|
if tracker and tracker != "none":
|
|
422
|
-
|
|
422
|
+
_log_to_trackers(settings, result, console, tracker_type=tracker)
|
|
423
423
|
|
|
424
424
|
if eval_output:
|
|
425
425
|
_save_results(eval_output, result, console)
|
|
@@ -33,7 +33,7 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
|
|
|
33
33
|
from evalvault.adapters.outbound.storage.factory import build_storage_adapter
|
|
34
34
|
from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
|
|
35
35
|
from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
|
|
36
|
-
from evalvault.config.settings import Settings, apply_profile
|
|
36
|
+
from evalvault.config.settings import Settings, apply_profile, resolve_tracker_providers
|
|
37
37
|
from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
|
|
38
38
|
from evalvault.domain.entities.multiturn import (
|
|
39
39
|
MultiTurnConversationRecord,
|
|
@@ -86,7 +86,8 @@ from .run_helpers import (
|
|
|
86
86
|
_display_results,
|
|
87
87
|
_evaluate_streaming_run,
|
|
88
88
|
_is_oss_open_model,
|
|
89
|
-
|
|
89
|
+
_log_analysis_artifacts,
|
|
90
|
+
_log_to_trackers,
|
|
90
91
|
_option_was_provided,
|
|
91
92
|
_print_run_mode_banner,
|
|
92
93
|
_resolve_thresholds,
|
|
@@ -178,6 +179,14 @@ def _log_duration(
|
|
|
178
179
|
_log_timestamp(console, verbose, f"{message} ({elapsed:.2f}s)")
|
|
179
180
|
|
|
180
181
|
|
|
182
|
+
def _infer_phoenix_model_provider(model_name: str) -> str:
|
|
183
|
+
if not model_name:
|
|
184
|
+
return "OPENAI"
|
|
185
|
+
provider = model_name.split("/")[0].upper() if "/" in model_name else "OPENAI"
|
|
186
|
+
allowed = {"OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE", "DEEPSEEK", "XAI", "AWS", "OLLAMA"}
|
|
187
|
+
return provider if provider in allowed else "OPENAI"
|
|
188
|
+
|
|
189
|
+
|
|
181
190
|
def register_run_commands(
|
|
182
191
|
app: typer.Typer,
|
|
183
192
|
console: Console,
|
|
@@ -358,10 +367,13 @@ def register_run_commands(
|
|
|
358
367
|
help="Store stage events in the SQLite database (requires --db).",
|
|
359
368
|
),
|
|
360
369
|
tracker: str = typer.Option(
|
|
361
|
-
"
|
|
370
|
+
"mlflow+phoenix",
|
|
362
371
|
"--tracker",
|
|
363
372
|
"-t",
|
|
364
|
-
help=
|
|
373
|
+
help=(
|
|
374
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
375
|
+
"or combinations like 'mlflow+phoenix'."
|
|
376
|
+
),
|
|
365
377
|
rich_help_panel="Simple mode preset",
|
|
366
378
|
),
|
|
367
379
|
langfuse: bool = typer.Option(
|
|
@@ -667,13 +679,24 @@ def register_run_commands(
|
|
|
667
679
|
tracker_override = _option_was_provided(ctx, "tracker") or langfuse
|
|
668
680
|
selected_tracker = tracker
|
|
669
681
|
if preset.default_tracker:
|
|
670
|
-
if tracker_override
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
682
|
+
if tracker_override:
|
|
683
|
+
try:
|
|
684
|
+
providers = resolve_tracker_providers(tracker)
|
|
685
|
+
except ValueError as exc:
|
|
686
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
687
|
+
raise typer.Exit(2) from exc
|
|
688
|
+
if providers == ["none"]:
|
|
689
|
+
selected_tracker = preset.default_tracker
|
|
690
|
+
elif preset.default_tracker not in providers:
|
|
691
|
+
print_cli_warning(
|
|
692
|
+
console,
|
|
693
|
+
f"Simple 모드는 tracker에 {preset.default_tracker}가 포함되어야 합니다.",
|
|
694
|
+
tips=["다른 Tracker를 사용하려면 --mode full을 사용하세요."],
|
|
695
|
+
)
|
|
696
|
+
providers.append(preset.default_tracker)
|
|
697
|
+
selected_tracker = "+".join(providers)
|
|
698
|
+
else:
|
|
699
|
+
selected_tracker = preset.default_tracker
|
|
677
700
|
tracker = selected_tracker
|
|
678
701
|
|
|
679
702
|
prompt_manifest_value = prompt_manifest
|
|
@@ -1646,10 +1669,29 @@ def register_run_commands(
|
|
|
1646
1669
|
)
|
|
1647
1670
|
raise typer.Exit(2) from exc
|
|
1648
1671
|
|
|
1672
|
+
effective_tracker = tracker
|
|
1673
|
+
if langfuse and tracker == "none" and not preset.default_tracker:
|
|
1674
|
+
effective_tracker = "langfuse"
|
|
1675
|
+
print_cli_warning(
|
|
1676
|
+
console,
|
|
1677
|
+
"--langfuse 플래그는 곧 제거됩니다.",
|
|
1678
|
+
tips=["대신 --tracker langfuse를 사용하세요."],
|
|
1679
|
+
)
|
|
1680
|
+
|
|
1681
|
+
try:
|
|
1682
|
+
effective_providers = resolve_tracker_providers(effective_tracker)
|
|
1683
|
+
except ValueError as exc:
|
|
1684
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
1685
|
+
raise typer.Exit(2) from exc
|
|
1686
|
+
|
|
1649
1687
|
phoenix_dataset_name = phoenix_dataset
|
|
1650
1688
|
if phoenix_experiment and not phoenix_dataset_name:
|
|
1651
1689
|
phoenix_dataset_name = f"{ds.name}:{ds.version}"
|
|
1652
1690
|
|
|
1691
|
+
auto_phoenix_sync = "phoenix" in effective_providers
|
|
1692
|
+
if auto_phoenix_sync and not phoenix_dataset_name:
|
|
1693
|
+
phoenix_dataset_name = f"{ds.name}:{ds.version}"
|
|
1694
|
+
|
|
1653
1695
|
phoenix_dataset_description_value = phoenix_dataset_description
|
|
1654
1696
|
if phoenix_dataset_name and not phoenix_dataset_description_value:
|
|
1655
1697
|
desc_source = ds.metadata.get("description") if isinstance(ds.metadata, dict) else None
|
|
@@ -1659,13 +1701,20 @@ def register_run_commands(
|
|
|
1659
1701
|
phoenix_dataset_result: dict[str, Any] | None = None
|
|
1660
1702
|
phoenix_experiment_result: dict[str, Any] | None = None
|
|
1661
1703
|
|
|
1662
|
-
if phoenix_dataset_name or phoenix_experiment:
|
|
1704
|
+
if phoenix_dataset_name or phoenix_experiment or auto_phoenix_sync:
|
|
1663
1705
|
try:
|
|
1664
1706
|
phoenix_sync_service = PhoenixSyncService(
|
|
1665
1707
|
endpoint=settings.phoenix_endpoint,
|
|
1666
1708
|
api_token=getattr(settings, "phoenix_api_token", None),
|
|
1667
1709
|
)
|
|
1668
1710
|
except PhoenixSyncError as exc:
|
|
1711
|
+
if auto_phoenix_sync:
|
|
1712
|
+
print_cli_error(
|
|
1713
|
+
console,
|
|
1714
|
+
"Phoenix Sync 서비스를 초기화할 수 없습니다.",
|
|
1715
|
+
details=str(exc),
|
|
1716
|
+
)
|
|
1717
|
+
raise typer.Exit(2) from exc
|
|
1669
1718
|
print_cli_warning(
|
|
1670
1719
|
console,
|
|
1671
1720
|
"Phoenix Sync 서비스를 초기화할 수 없습니다.",
|
|
@@ -1673,19 +1722,10 @@ def register_run_commands(
|
|
|
1673
1722
|
)
|
|
1674
1723
|
phoenix_sync_service = None
|
|
1675
1724
|
|
|
1676
|
-
effective_tracker = tracker
|
|
1677
|
-
if langfuse and tracker == "none" and not preset.default_tracker:
|
|
1678
|
-
effective_tracker = "langfuse"
|
|
1679
|
-
print_cli_warning(
|
|
1680
|
-
console,
|
|
1681
|
-
"--langfuse 플래그는 곧 제거됩니다.",
|
|
1682
|
-
tips=["대신 --tracker langfuse를 사용하세요."],
|
|
1683
|
-
)
|
|
1684
|
-
|
|
1685
1725
|
config_wants_phoenix = getattr(settings, "phoenix_enabled", False)
|
|
1686
1726
|
if not isinstance(config_wants_phoenix, bool):
|
|
1687
1727
|
config_wants_phoenix = False
|
|
1688
|
-
should_enable_phoenix =
|
|
1728
|
+
should_enable_phoenix = "phoenix" in effective_providers or config_wants_phoenix
|
|
1689
1729
|
if should_enable_phoenix:
|
|
1690
1730
|
ensure_phoenix_instrumentation(settings, console=console, force=True)
|
|
1691
1731
|
|
|
@@ -2032,6 +2072,9 @@ def register_run_commands(
|
|
|
2032
2072
|
)
|
|
2033
2073
|
if prompt_bundle:
|
|
2034
2074
|
result.tracker_metadata["prompt_set"] = build_prompt_summary(prompt_bundle)
|
|
2075
|
+
result.tracker_metadata["prompt_set_detail"] = prompt_bundle.to_dict(
|
|
2076
|
+
include_content=True
|
|
2077
|
+
)
|
|
2035
2078
|
|
|
2036
2079
|
if retriever_instance or used_versioned_prefill:
|
|
2037
2080
|
retriever_tracker_meta: dict[str, Any] = {
|
|
@@ -2105,13 +2148,29 @@ def register_run_commands(
|
|
|
2105
2148
|
)
|
|
2106
2149
|
console.print(f"[dim]View datasets: {dataset_info.url}[/dim]")
|
|
2107
2150
|
except PhoenixSyncError as exc:
|
|
2151
|
+
if auto_phoenix_sync:
|
|
2152
|
+
print_cli_error(
|
|
2153
|
+
console,
|
|
2154
|
+
"Phoenix Dataset 업로드에 실패했습니다.",
|
|
2155
|
+
details=str(exc),
|
|
2156
|
+
)
|
|
2157
|
+
raise typer.Exit(2) from exc
|
|
2108
2158
|
print_cli_warning(
|
|
2109
2159
|
console,
|
|
2110
2160
|
"Phoenix Dataset 업로드에 실패했습니다.",
|
|
2111
2161
|
tips=[str(exc)],
|
|
2112
2162
|
)
|
|
2163
|
+
if auto_phoenix_sync and not phoenix_experiment:
|
|
2164
|
+
phoenix_experiment = f"{result.model_name}-{result.run_id[:8]}"
|
|
2113
2165
|
if phoenix_experiment:
|
|
2114
2166
|
if not phoenix_dataset_result:
|
|
2167
|
+
if auto_phoenix_sync:
|
|
2168
|
+
print_cli_error(
|
|
2169
|
+
console,
|
|
2170
|
+
"Dataset 업로드에 실패해 Phoenix Experiment 생성을 진행할 수 없습니다.",
|
|
2171
|
+
details="Phoenix dataset 업로드가 필요합니다.",
|
|
2172
|
+
)
|
|
2173
|
+
raise typer.Exit(2)
|
|
2115
2174
|
print_cli_warning(
|
|
2116
2175
|
console,
|
|
2117
2176
|
"Dataset 업로드에 실패해 Phoenix Experiment 생성을 건너뜁니다.",
|
|
@@ -2169,6 +2228,41 @@ def register_run_commands(
|
|
|
2169
2228
|
phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
|
|
2170
2229
|
phoenix_meta.setdefault("schema_version", 2)
|
|
2171
2230
|
phoenix_meta["prompts"] = prompt_metadata_entries
|
|
2231
|
+
if phoenix_sync_service and "phoenix" in effective_providers:
|
|
2232
|
+
try:
|
|
2233
|
+
prompt_set_summary = result.tracker_metadata.get("prompt_set") or {}
|
|
2234
|
+
prompt_set_name = prompt_set_summary.get("prompt_set_name")
|
|
2235
|
+
prompt_entries = list(prompt_metadata_entries)
|
|
2236
|
+
prompt_set_detail = result.tracker_metadata.get("prompt_set_detail")
|
|
2237
|
+
if isinstance(prompt_set_detail, dict):
|
|
2238
|
+
for item in prompt_set_detail.get("items", []):
|
|
2239
|
+
prompt = item.get("prompt") or {}
|
|
2240
|
+
if not isinstance(prompt, dict):
|
|
2241
|
+
continue
|
|
2242
|
+
prompt_entries.append(
|
|
2243
|
+
{
|
|
2244
|
+
"name": prompt.get("name"),
|
|
2245
|
+
"role": item.get("role"),
|
|
2246
|
+
"kind": prompt.get("kind"),
|
|
2247
|
+
"checksum": prompt.get("checksum"),
|
|
2248
|
+
"content": prompt.get("content"),
|
|
2249
|
+
"source": prompt.get("source"),
|
|
2250
|
+
}
|
|
2251
|
+
)
|
|
2252
|
+
synced = phoenix_sync_service.sync_prompts(
|
|
2253
|
+
prompt_entries=prompt_entries,
|
|
2254
|
+
model_name=result.model_name,
|
|
2255
|
+
model_provider=_infer_phoenix_model_provider(result.model_name),
|
|
2256
|
+
prompt_set_name=prompt_set_name,
|
|
2257
|
+
)
|
|
2258
|
+
if synced:
|
|
2259
|
+
phoenix_meta["prompts"] = synced
|
|
2260
|
+
except PhoenixSyncError as exc:
|
|
2261
|
+
print_cli_warning(
|
|
2262
|
+
console,
|
|
2263
|
+
"Phoenix Prompt 동기화에 실패했습니다.",
|
|
2264
|
+
tips=[str(exc)],
|
|
2265
|
+
)
|
|
2172
2266
|
|
|
2173
2267
|
if stage_events or stage_store:
|
|
2174
2268
|
stage_event_builder = StageEventBuilder()
|
|
@@ -2187,7 +2281,7 @@ def register_run_commands(
|
|
|
2187
2281
|
|
|
2188
2282
|
if effective_tracker != "none":
|
|
2189
2283
|
phoenix_opts = None
|
|
2190
|
-
if
|
|
2284
|
+
if "phoenix" in effective_providers:
|
|
2191
2285
|
phoenix_opts = {
|
|
2192
2286
|
"max_traces": phoenix_max_traces,
|
|
2193
2287
|
"metadata": phoenix_trace_metadata or None,
|
|
@@ -2198,7 +2292,7 @@ def register_run_commands(
|
|
|
2198
2292
|
verbose,
|
|
2199
2293
|
f"Tracker 로깅 시작 ({effective_tracker})",
|
|
2200
2294
|
)
|
|
2201
|
-
|
|
2295
|
+
_log_to_trackers(
|
|
2202
2296
|
settings,
|
|
2203
2297
|
result,
|
|
2204
2298
|
console,
|
|
@@ -2276,6 +2370,12 @@ def register_run_commands(
|
|
|
2276
2370
|
pipeline_result,
|
|
2277
2371
|
artifacts_dir=artifacts_dir,
|
|
2278
2372
|
)
|
|
2373
|
+
result.tracker_metadata["analysis_artifacts"] = {
|
|
2374
|
+
"dir": artifact_index.get("dir"),
|
|
2375
|
+
"index": artifact_index.get("index"),
|
|
2376
|
+
"output": str(analysis_output_path),
|
|
2377
|
+
"report": str(analysis_report_path),
|
|
2378
|
+
}
|
|
2279
2379
|
payload = serialize_pipeline_result(pipeline_result)
|
|
2280
2380
|
payload["run_id"] = result.run_id
|
|
2281
2381
|
payload["artifacts"] = artifact_index
|
|
@@ -2292,6 +2392,18 @@ def register_run_commands(
|
|
|
2292
2392
|
"[green]자동 분석 상세 결과 저장:[/green] "
|
|
2293
2393
|
f"{artifact_index['dir']} (index: {artifact_index['index']})\n"
|
|
2294
2394
|
)
|
|
2395
|
+
if effective_tracker != "none":
|
|
2396
|
+
_log_analysis_artifacts(
|
|
2397
|
+
settings,
|
|
2398
|
+
result,
|
|
2399
|
+
console,
|
|
2400
|
+
effective_tracker,
|
|
2401
|
+
analysis_payload=payload,
|
|
2402
|
+
artifact_index=artifact_index,
|
|
2403
|
+
report_text=report_text,
|
|
2404
|
+
output_path=analysis_output_path,
|
|
2405
|
+
report_path=analysis_report_path,
|
|
2406
|
+
)
|
|
2295
2407
|
|
|
2296
2408
|
@app.command(
|
|
2297
2409
|
name="run-simple",
|
|
@@ -2395,10 +2507,13 @@ def register_run_commands(
|
|
|
2395
2507
|
help="Store stage events in the SQLite database (requires --db).",
|
|
2396
2508
|
),
|
|
2397
2509
|
tracker: str = typer.Option(
|
|
2398
|
-
"
|
|
2510
|
+
"mlflow+phoenix",
|
|
2399
2511
|
"--tracker",
|
|
2400
2512
|
"-t",
|
|
2401
|
-
help=
|
|
2513
|
+
help=(
|
|
2514
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
2515
|
+
"or combinations like 'mlflow+phoenix'."
|
|
2516
|
+
),
|
|
2402
2517
|
),
|
|
2403
2518
|
langfuse: bool = typer.Option(
|
|
2404
2519
|
False,
|
|
@@ -2687,10 +2802,13 @@ def register_run_commands(
|
|
|
2687
2802
|
help="Store stage events in the SQLite database (requires --db).",
|
|
2688
2803
|
),
|
|
2689
2804
|
tracker: str = typer.Option(
|
|
2690
|
-
"
|
|
2805
|
+
"mlflow+phoenix",
|
|
2691
2806
|
"--tracker",
|
|
2692
2807
|
"-t",
|
|
2693
|
-
help=
|
|
2808
|
+
help=(
|
|
2809
|
+
"Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
|
|
2810
|
+
"or combinations like 'mlflow+phoenix'."
|
|
2811
|
+
),
|
|
2694
2812
|
),
|
|
2695
2813
|
langfuse: bool = typer.Option(
|
|
2696
2814
|
False,
|