evalvault 1.75.0__py3-none-any.whl → 1.77.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,7 @@ from evalvault.adapters.outbound.phoenix.sync_service import (
33
33
  from evalvault.adapters.outbound.storage.factory import build_storage_adapter
34
34
  from evalvault.adapters.outbound.tracer.phoenix_tracer_adapter import PhoenixTracerAdapter
35
35
  from evalvault.config.phoenix_support import ensure_phoenix_instrumentation
36
- from evalvault.config.settings import Settings, apply_profile
36
+ from evalvault.config.settings import Settings, apply_profile, resolve_tracker_providers
37
37
  from evalvault.domain.entities.analysis_pipeline import AnalysisIntent
38
38
  from evalvault.domain.entities.multiturn import (
39
39
  MultiTurnConversationRecord,
@@ -86,7 +86,8 @@ from .run_helpers import (
86
86
  _display_results,
87
87
  _evaluate_streaming_run,
88
88
  _is_oss_open_model,
89
- _log_to_tracker,
89
+ _log_analysis_artifacts,
90
+ _log_to_trackers,
90
91
  _option_was_provided,
91
92
  _print_run_mode_banner,
92
93
  _resolve_thresholds,
@@ -178,6 +179,14 @@ def _log_duration(
178
179
  _log_timestamp(console, verbose, f"{message} ({elapsed:.2f}s)")
179
180
 
180
181
 
182
+ def _infer_phoenix_model_provider(model_name: str) -> str:
183
+ if not model_name:
184
+ return "OPENAI"
185
+ provider = model_name.split("/")[0].upper() if "/" in model_name else "OPENAI"
186
+ allowed = {"OPENAI", "AZURE_OPENAI", "ANTHROPIC", "GOOGLE", "DEEPSEEK", "XAI", "AWS", "OLLAMA"}
187
+ return provider if provider in allowed else "OPENAI"
188
+
189
+
181
190
  def register_run_commands(
182
191
  app: typer.Typer,
183
192
  console: Console,
@@ -358,10 +367,13 @@ def register_run_commands(
358
367
  help="Store stage events in the SQLite database (requires --db).",
359
368
  ),
360
369
  tracker: str = typer.Option(
361
- "none",
370
+ "mlflow+phoenix",
362
371
  "--tracker",
363
372
  "-t",
364
- help="Tracker to log results: 'langfuse', 'mlflow', 'phoenix', or 'none'.",
373
+ help=(
374
+ "Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
375
+ "or combinations like 'mlflow+phoenix'."
376
+ ),
365
377
  rich_help_panel="Simple mode preset",
366
378
  ),
367
379
  langfuse: bool = typer.Option(
@@ -667,13 +679,24 @@ def register_run_commands(
667
679
  tracker_override = _option_was_provided(ctx, "tracker") or langfuse
668
680
  selected_tracker = tracker
669
681
  if preset.default_tracker:
670
- if tracker_override and tracker != preset.default_tracker:
671
- print_cli_warning(
672
- console,
673
- f"Simple 모드는 tracker={preset.default_tracker}로 고정됩니다.",
674
- tips=["다른 Tracker 사용하려면 --mode full을 사용하세요."],
675
- )
676
- selected_tracker = preset.default_tracker
682
+ if tracker_override:
683
+ try:
684
+ providers = resolve_tracker_providers(tracker)
685
+ except ValueError as exc:
686
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
687
+ raise typer.Exit(2) from exc
688
+ if providers == ["none"]:
689
+ selected_tracker = preset.default_tracker
690
+ elif preset.default_tracker not in providers:
691
+ print_cli_warning(
692
+ console,
693
+ f"Simple 모드는 tracker에 {preset.default_tracker}가 포함되어야 합니다.",
694
+ tips=["다른 Tracker를 사용하려면 --mode full을 사용하세요."],
695
+ )
696
+ providers.append(preset.default_tracker)
697
+ selected_tracker = "+".join(providers)
698
+ else:
699
+ selected_tracker = preset.default_tracker
677
700
  tracker = selected_tracker
678
701
 
679
702
  prompt_manifest_value = prompt_manifest
@@ -852,7 +875,7 @@ def register_run_commands(
852
875
  if profile_name:
853
876
  settings = apply_profile(settings, profile_name)
854
877
 
855
- if db_path is None:
878
+ if db_path is None and settings.db_backend == "sqlite":
856
879
  db_path = Path(settings.evalvault_db_path)
857
880
 
858
881
  excel_output: Path | None = None
@@ -1198,6 +1221,7 @@ def register_run_commands(
1198
1221
  for turn in conversation.turn_results:
1199
1222
  turn_results.append(turn)
1200
1223
  _save_multiturn_to_db(
1224
+ settings,
1201
1225
  db_path,
1202
1226
  run_record,
1203
1227
  conversation_records,
@@ -1646,10 +1670,29 @@ def register_run_commands(
1646
1670
  )
1647
1671
  raise typer.Exit(2) from exc
1648
1672
 
1673
+ effective_tracker = tracker
1674
+ if langfuse:
1675
+ effective_tracker = "langfuse"
1676
+ print_cli_warning(
1677
+ console,
1678
+ "--langfuse 플래그는 곧 제거됩니다.",
1679
+ tips=["대신 --tracker langfuse를 사용하세요."],
1680
+ )
1681
+
1682
+ try:
1683
+ effective_providers = resolve_tracker_providers(effective_tracker)
1684
+ except ValueError as exc:
1685
+ print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
1686
+ raise typer.Exit(2) from exc
1687
+
1649
1688
  phoenix_dataset_name = phoenix_dataset
1650
1689
  if phoenix_experiment and not phoenix_dataset_name:
1651
1690
  phoenix_dataset_name = f"{ds.name}:{ds.version}"
1652
1691
 
1692
+ auto_phoenix_sync = "phoenix" in effective_providers and not stream
1693
+ if auto_phoenix_sync and not phoenix_dataset_name:
1694
+ phoenix_dataset_name = f"{ds.name}:{ds.version}"
1695
+
1653
1696
  phoenix_dataset_description_value = phoenix_dataset_description
1654
1697
  if phoenix_dataset_name and not phoenix_dataset_description_value:
1655
1698
  desc_source = ds.metadata.get("description") if isinstance(ds.metadata, dict) else None
@@ -1659,13 +1702,23 @@ def register_run_commands(
1659
1702
  phoenix_dataset_result: dict[str, Any] | None = None
1660
1703
  phoenix_experiment_result: dict[str, Any] | None = None
1661
1704
 
1662
- if phoenix_dataset_name or phoenix_experiment:
1705
+ if phoenix_dataset_name or phoenix_experiment or auto_phoenix_sync:
1663
1706
  try:
1707
+ phoenix_endpoint = getattr(settings, "phoenix_endpoint", None)
1708
+ if not isinstance(phoenix_endpoint, str) or not phoenix_endpoint.strip():
1709
+ phoenix_endpoint = "http://localhost:6006/v1/traces"
1664
1710
  phoenix_sync_service = PhoenixSyncService(
1665
- endpoint=settings.phoenix_endpoint,
1711
+ endpoint=phoenix_endpoint,
1666
1712
  api_token=getattr(settings, "phoenix_api_token", None),
1667
1713
  )
1668
1714
  except PhoenixSyncError as exc:
1715
+ if auto_phoenix_sync:
1716
+ print_cli_error(
1717
+ console,
1718
+ "Phoenix Sync 서비스를 초기화할 수 없습니다.",
1719
+ details=str(exc),
1720
+ )
1721
+ raise typer.Exit(2) from exc
1669
1722
  print_cli_warning(
1670
1723
  console,
1671
1724
  "Phoenix Sync 서비스를 초기화할 수 없습니다.",
@@ -1673,19 +1726,10 @@ def register_run_commands(
1673
1726
  )
1674
1727
  phoenix_sync_service = None
1675
1728
 
1676
- effective_tracker = tracker
1677
- if langfuse and tracker == "none" and not preset.default_tracker:
1678
- effective_tracker = "langfuse"
1679
- print_cli_warning(
1680
- console,
1681
- "--langfuse 플래그는 곧 제거됩니다.",
1682
- tips=["대신 --tracker langfuse를 사용하세요."],
1683
- )
1684
-
1685
1729
  config_wants_phoenix = getattr(settings, "phoenix_enabled", False)
1686
1730
  if not isinstance(config_wants_phoenix, bool):
1687
1731
  config_wants_phoenix = False
1688
- should_enable_phoenix = effective_tracker == "phoenix" or config_wants_phoenix
1732
+ should_enable_phoenix = "phoenix" in effective_providers or config_wants_phoenix
1689
1733
  if should_enable_phoenix:
1690
1734
  ensure_phoenix_instrumentation(settings, console=console, force=True)
1691
1735
 
@@ -2032,6 +2076,9 @@ def register_run_commands(
2032
2076
  )
2033
2077
  if prompt_bundle:
2034
2078
  result.tracker_metadata["prompt_set"] = build_prompt_summary(prompt_bundle)
2079
+ result.tracker_metadata["prompt_set_detail"] = prompt_bundle.to_dict(
2080
+ include_content=True
2081
+ )
2035
2082
 
2036
2083
  if retriever_instance or used_versioned_prefill:
2037
2084
  retriever_tracker_meta: dict[str, Any] = {
@@ -2105,13 +2152,29 @@ def register_run_commands(
2105
2152
  )
2106
2153
  console.print(f"[dim]View datasets: {dataset_info.url}[/dim]")
2107
2154
  except PhoenixSyncError as exc:
2155
+ if auto_phoenix_sync:
2156
+ print_cli_error(
2157
+ console,
2158
+ "Phoenix Dataset 업로드에 실패했습니다.",
2159
+ details=str(exc),
2160
+ )
2161
+ raise typer.Exit(2) from exc
2108
2162
  print_cli_warning(
2109
2163
  console,
2110
2164
  "Phoenix Dataset 업로드에 실패했습니다.",
2111
2165
  tips=[str(exc)],
2112
2166
  )
2167
+ if auto_phoenix_sync and not phoenix_experiment:
2168
+ phoenix_experiment = f"{result.model_name}-{result.run_id[:8]}"
2113
2169
  if phoenix_experiment:
2114
2170
  if not phoenix_dataset_result:
2171
+ if auto_phoenix_sync:
2172
+ print_cli_error(
2173
+ console,
2174
+ "Dataset 업로드에 실패해 Phoenix Experiment 생성을 진행할 수 없습니다.",
2175
+ details="Phoenix dataset 업로드가 필요합니다.",
2176
+ )
2177
+ raise typer.Exit(2)
2115
2178
  print_cli_warning(
2116
2179
  console,
2117
2180
  "Dataset 업로드에 실패해 Phoenix Experiment 생성을 건너뜁니다.",
@@ -2169,6 +2232,41 @@ def register_run_commands(
2169
2232
  phoenix_meta = result.tracker_metadata.setdefault("phoenix", {})
2170
2233
  phoenix_meta.setdefault("schema_version", 2)
2171
2234
  phoenix_meta["prompts"] = prompt_metadata_entries
2235
+ if phoenix_sync_service and "phoenix" in effective_providers:
2236
+ try:
2237
+ prompt_set_summary = result.tracker_metadata.get("prompt_set") or {}
2238
+ prompt_set_name = prompt_set_summary.get("prompt_set_name")
2239
+ prompt_entries = list(prompt_metadata_entries)
2240
+ prompt_set_detail = result.tracker_metadata.get("prompt_set_detail")
2241
+ if isinstance(prompt_set_detail, dict):
2242
+ for item in prompt_set_detail.get("items", []):
2243
+ prompt = item.get("prompt") or {}
2244
+ if not isinstance(prompt, dict):
2245
+ continue
2246
+ prompt_entries.append(
2247
+ {
2248
+ "name": prompt.get("name"),
2249
+ "role": item.get("role"),
2250
+ "kind": prompt.get("kind"),
2251
+ "checksum": prompt.get("checksum"),
2252
+ "content": prompt.get("content"),
2253
+ "source": prompt.get("source"),
2254
+ }
2255
+ )
2256
+ synced = phoenix_sync_service.sync_prompts(
2257
+ prompt_entries=prompt_entries,
2258
+ model_name=result.model_name,
2259
+ model_provider=_infer_phoenix_model_provider(result.model_name),
2260
+ prompt_set_name=prompt_set_name,
2261
+ )
2262
+ if synced:
2263
+ phoenix_meta["prompts"] = synced
2264
+ except PhoenixSyncError as exc:
2265
+ print_cli_warning(
2266
+ console,
2267
+ "Phoenix Prompt 동기화에 실패했습니다.",
2268
+ tips=[str(exc)],
2269
+ )
2172
2270
 
2173
2271
  if stage_events or stage_store:
2174
2272
  stage_event_builder = StageEventBuilder()
@@ -2187,7 +2285,7 @@ def register_run_commands(
2187
2285
 
2188
2286
  if effective_tracker != "none":
2189
2287
  phoenix_opts = None
2190
- if effective_tracker == "phoenix":
2288
+ if "phoenix" in effective_providers:
2191
2289
  phoenix_opts = {
2192
2290
  "max_traces": phoenix_max_traces,
2193
2291
  "metadata": phoenix_trace_metadata or None,
@@ -2198,7 +2296,7 @@ def register_run_commands(
2198
2296
  verbose,
2199
2297
  f"Tracker 로깅 시작 ({effective_tracker})",
2200
2298
  )
2201
- _log_to_tracker(
2299
+ _log_to_trackers(
2202
2300
  settings,
2203
2301
  result,
2204
2302
  console,
@@ -2210,6 +2308,7 @@ def register_run_commands(
2210
2308
  db_started_at = datetime.now()
2211
2309
  _log_timestamp(console, verbose, "DB 저장 시작")
2212
2310
  _save_to_db(
2311
+ settings,
2213
2312
  db_path,
2214
2313
  result,
2215
2314
  console,
@@ -2276,6 +2375,12 @@ def register_run_commands(
2276
2375
  pipeline_result,
2277
2376
  artifacts_dir=artifacts_dir,
2278
2377
  )
2378
+ result.tracker_metadata["analysis_artifacts"] = {
2379
+ "dir": artifact_index.get("dir"),
2380
+ "index": artifact_index.get("index"),
2381
+ "output": str(analysis_output_path),
2382
+ "report": str(analysis_report_path),
2383
+ }
2279
2384
  payload = serialize_pipeline_result(pipeline_result)
2280
2385
  payload["run_id"] = result.run_id
2281
2386
  payload["artifacts"] = artifact_index
@@ -2292,6 +2397,18 @@ def register_run_commands(
2292
2397
  "[green]자동 분석 상세 결과 저장:[/green] "
2293
2398
  f"{artifact_index['dir']} (index: {artifact_index['index']})\n"
2294
2399
  )
2400
+ if effective_tracker != "none":
2401
+ _log_analysis_artifacts(
2402
+ settings,
2403
+ result,
2404
+ console,
2405
+ effective_tracker,
2406
+ analysis_payload=payload,
2407
+ artifact_index=artifact_index,
2408
+ report_text=report_text,
2409
+ output_path=analysis_output_path,
2410
+ report_path=analysis_report_path,
2411
+ )
2295
2412
 
2296
2413
  @app.command(
2297
2414
  name="run-simple",
@@ -2395,10 +2512,13 @@ def register_run_commands(
2395
2512
  help="Store stage events in the SQLite database (requires --db).",
2396
2513
  ),
2397
2514
  tracker: str = typer.Option(
2398
- "none",
2515
+ "mlflow+phoenix",
2399
2516
  "--tracker",
2400
2517
  "-t",
2401
- help="Tracker to log results: 'langfuse', 'mlflow', 'phoenix', or 'none'.",
2518
+ help=(
2519
+ "Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
2520
+ "or combinations like 'mlflow+phoenix'."
2521
+ ),
2402
2522
  ),
2403
2523
  langfuse: bool = typer.Option(
2404
2524
  False,
@@ -2687,10 +2807,13 @@ def register_run_commands(
2687
2807
  help="Store stage events in the SQLite database (requires --db).",
2688
2808
  ),
2689
2809
  tracker: str = typer.Option(
2690
- "none",
2810
+ "mlflow+phoenix",
2691
2811
  "--tracker",
2692
2812
  "-t",
2693
- help="Tracker to log results: 'langfuse', 'mlflow', 'phoenix', or 'none'.",
2813
+ help=(
2814
+ "Tracker to log results: 'langfuse', 'mlflow', 'phoenix', 'none', "
2815
+ "or combinations like 'mlflow+phoenix'."
2816
+ ),
2694
2817
  ),
2695
2818
  langfuse: bool = typer.Option(
2696
2819
  False,