evalvault 1.75.0__py3-none-any.whl → 1.77.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,6 +52,8 @@ class PhoenixAdapter(TrackerPort):
52
52
  self,
53
53
  endpoint: str = "http://localhost:6006/v1/traces",
54
54
  service_name: str = "evalvault",
55
+ project_name: str | None = None,
56
+ annotations_enabled: bool = True,
55
57
  ):
56
58
  """Initialize Phoenix adapter with OpenTelemetry.
57
59
 
@@ -61,11 +63,14 @@ class PhoenixAdapter(TrackerPort):
61
63
  """
62
64
  self._endpoint = endpoint
63
65
  self._service_name = service_name
66
+ self._project_name = project_name
67
+ self._annotations_enabled = annotations_enabled
64
68
  self._tracer: Any | None = None
65
69
  self._tracer_provider: TracerProvider | None = None
66
70
  self._active_spans: dict[str, Any] = {}
67
71
  self._tracer_any: Any | None = None
68
72
  self._initialized = False
73
+ self._annotations_client: Any | None = None
69
74
 
70
75
  def _ensure_initialized(self) -> None:
71
76
  """Lazy initialization of OpenTelemetry tracer."""
@@ -96,7 +101,10 @@ class PhoenixAdapter(TrackerPort):
96
101
  return
97
102
 
98
103
  # Create resource with service name
99
- resource = Resource.create({"service.name": self._service_name})
104
+ resource_attributes = {"service.name": self._service_name}
105
+ if self._project_name:
106
+ resource_attributes["project.name"] = self._project_name
107
+ resource = Resource.create(resource_attributes)
100
108
 
101
109
  # Create tracer provider
102
110
  self._tracer_provider = TracerProvider(resource=resource)
@@ -123,6 +131,64 @@ class PhoenixAdapter(TrackerPort):
123
131
  "Failed to initialize Phoenix tracer. Check endpoint configuration and dependencies."
124
132
  ) from e
125
133
 
134
+ def _phoenix_base_url(self) -> str:
135
+ if "/v1/traces" in self._endpoint:
136
+ return self._endpoint.split("/v1/traces")[0]
137
+ return self._endpoint.rstrip("/")
138
+
139
+ def _get_annotations_client(self) -> Any | None:
140
+ if not self._annotations_enabled:
141
+ return None
142
+ if self._annotations_client is not None:
143
+ return self._annotations_client
144
+ try:
145
+ from phoenix.client import Client
146
+ except Exception:
147
+ return None
148
+ self._annotations_client = Client(base_url=self._phoenix_base_url())
149
+ return self._annotations_client
150
+
151
+ def _annotate_span(
152
+ self,
153
+ *,
154
+ span: Any,
155
+ name: str,
156
+ label: str,
157
+ score: float | None = None,
158
+ explanation: str | None = None,
159
+ ) -> None:
160
+ client = self._get_annotations_client()
161
+ if client is None or span is None:
162
+ return
163
+ try:
164
+ from opentelemetry.trace import format_span_id
165
+
166
+ span_id = format_span_id(span.get_span_context().span_id)
167
+ spans_client = getattr(client, "spans", None)
168
+ add_span_annotation = (
169
+ getattr(spans_client, "add_span_annotation", None) if spans_client else None
170
+ )
171
+ if callable(add_span_annotation):
172
+ add_span_annotation(
173
+ annotation_name=name,
174
+ annotator_kind="CODE",
175
+ span_id=span_id,
176
+ label=label,
177
+ score=score,
178
+ explanation=explanation,
179
+ )
180
+ return
181
+ client.annotations.add_span_annotation(
182
+ annotation_name=name,
183
+ annotator_kind="CODE",
184
+ span_id=span_id,
185
+ label=label,
186
+ score=score,
187
+ explanation=explanation,
188
+ )
189
+ except Exception:
190
+ return
191
+
126
192
  def start_trace(self, name: str, metadata: dict[str, Any] | None = None) -> str:
127
193
  """Start a new trace.
128
194
 
@@ -287,6 +353,7 @@ class PhoenixAdapter(TrackerPort):
287
353
 
288
354
  # Calculate per-metric summary
289
355
  metric_summary = {}
356
+ total_count = len(run.results) if run.results else 0
290
357
  for metric_name in run.metrics_evaluated:
291
358
  passed_count = sum(
292
359
  1
@@ -299,9 +366,9 @@ class PhoenixAdapter(TrackerPort):
299
366
  "average_score": round(avg_score, 4) if avg_score else 0.0,
300
367
  "threshold": threshold,
301
368
  "passed": passed_count,
302
- "failed": len(run.results) - passed_count,
303
- "total": len(run.results),
304
- "pass_rate": round(passed_count / len(run.results), 4) if run.results else 0.0,
369
+ "failed": total_count - passed_count,
370
+ "total": total_count,
371
+ "pass_rate": round(passed_count / total_count, 4) if total_count else 0.0,
305
372
  }
306
373
 
307
374
  # Start root trace
@@ -328,8 +395,17 @@ class PhoenixAdapter(TrackerPort):
328
395
 
329
396
  # Set evaluation-specific attributes
330
397
  span = self._active_spans[trace_id]
398
+ span.set_attribute("openinference.span.kind", "EVALUATOR")
331
399
  span.set_attribute("evaluation.metrics", json.dumps(run.metrics_evaluated))
332
400
  span.set_attribute("evaluation.thresholds", json.dumps(run.thresholds))
401
+ span.set_attribute("evaluation.status", "pass" if run.pass_rate >= 1.0 else "fail")
402
+ if run.tracker_metadata:
403
+ project_name = run.tracker_metadata.get("project_name")
404
+ if project_name:
405
+ span.set_attribute("project.name", project_name)
406
+ project_kind = run.tracker_metadata.get("evaluation_task") or "evaluation"
407
+ span.set_attribute("project.kind", project_kind)
408
+ span.set_attribute("project.status", "pass" if run.pass_rate >= 1.0 else "fail")
333
409
 
334
410
  # Log average scores for each metric
335
411
  for metric_name, summary in metric_summary.items():
@@ -369,6 +445,8 @@ class PhoenixAdapter(TrackerPort):
369
445
  },
370
446
  "metrics": metric_summary,
371
447
  "custom_metrics": (run.tracker_metadata or {}).get("custom_metric_snapshot"),
448
+ "prompt_metadata": (run.tracker_metadata or {}).get("phoenix", {}).get("prompts"),
449
+ "tracker_metadata": run.tracker_metadata,
372
450
  "test_cases": [
373
451
  {
374
452
  "test_case_id": result.test_case_id,
@@ -420,6 +498,23 @@ class PhoenixAdapter(TrackerPort):
420
498
  f"test-case-{result.test_case_id}",
421
499
  context=context,
422
500
  ) as span:
501
+ try:
502
+ from opentelemetry.trace import Status, StatusCode
503
+
504
+ span.set_status(Status(StatusCode.OK if result.all_passed else StatusCode.ERROR))
505
+ except Exception:
506
+ pass
507
+ span.set_attribute("openinference.span.kind", "EVALUATOR")
508
+ span.set_attribute("evaluation.status", "pass" if result.all_passed else "fail")
509
+ self._annotate_span(
510
+ span=span,
511
+ name="evaluation_result",
512
+ label="pass" if result.all_passed else "fail",
513
+ score=1.0 if result.all_passed else 0.0,
514
+ explanation="All metrics passed"
515
+ if result.all_passed
516
+ else "One or more metrics failed",
517
+ )
423
518
  # Input data
424
519
  safe_question = sanitize_text(result.question, max_chars=MAX_LOG_CHARS) or ""
425
520
  safe_answer = sanitize_text(result.answer, max_chars=MAX_LOG_CHARS) or ""
@@ -439,6 +534,10 @@ class PhoenixAdapter(TrackerPort):
439
534
  # Metrics
440
535
  span.set_attribute("output.all_passed", result.all_passed)
441
536
  span.set_attribute("output.tokens_used", result.tokens_used)
537
+ if result.tokens_used:
538
+ span.set_attribute("llm.token_count.total", result.tokens_used)
539
+ if result.cost_usd is not None:
540
+ span.set_attribute("llm.cost.total", result.cost_usd)
442
541
 
443
542
  for metric in result.metrics:
444
543
  span.set_attribute(f"metric.{metric.name}.score", metric.score)
@@ -486,6 +585,7 @@ class PhoenixAdapter(TrackerPort):
486
585
  )
487
586
  if result.latency_ms:
488
587
  span.set_attribute("timing.latency_ms", result.latency_ms)
588
+ span.set_attribute("evaluation.latency_ms", result.latency_ms)
489
589
 
490
590
  def log_retrieval(
491
591
  self,
@@ -528,6 +628,13 @@ class PhoenixAdapter(TrackerPort):
528
628
  if tracer is None:
529
629
  raise RuntimeError("Phoenix tracer is not initialized")
530
630
  with tracer.start_span("retrieval", context=context) as span:
631
+ try:
632
+ from opentelemetry.trace import Status, StatusCode
633
+
634
+ span.set_status(Status(StatusCode.OK))
635
+ except Exception:
636
+ pass
637
+ span.set_attribute("openinference.span.kind", "RETRIEVER")
531
638
  # Set retrieval attributes
532
639
  for key, value in data.to_span_attributes().items():
533
640
  span.set_attribute(key, value)
@@ -541,14 +648,24 @@ class PhoenixAdapter(TrackerPort):
541
648
 
542
649
  span.set_attribute("spec.version", "0.1")
543
650
  span.set_attribute("rag.module", "retrieve")
651
+ if data.retrieval_time_ms:
652
+ span.set_attribute("retrieval.latency_ms", data.retrieval_time_ms)
544
653
 
545
654
  documents_payload = _build_retrieval_payload(data.candidates)
546
655
  span.set_attribute("custom.retrieval.doc_count", len(documents_payload))
547
656
  if documents_payload:
548
657
  span.set_attribute("retrieval.documents_json", serialize_json(documents_payload))
549
- doc_ids = _extract_doc_ids(documents_payload)
550
- if doc_ids:
551
- span.set_attribute("output.value", doc_ids)
658
+ previews = [
659
+ item.get("content_preview")
660
+ for item in documents_payload
661
+ if item.get("content_preview")
662
+ ]
663
+ if previews:
664
+ span.set_attribute("output.value", previews)
665
+ else:
666
+ doc_ids = _extract_doc_ids(documents_payload)
667
+ if doc_ids:
668
+ span.set_attribute("output.value", doc_ids)
552
669
 
553
670
  # Log each retrieved document as an event
554
671
  for i, doc in enumerate(data.candidates):
@@ -615,10 +732,31 @@ class PhoenixAdapter(TrackerPort):
615
732
  if tracer is None:
616
733
  raise RuntimeError("Phoenix tracer is not initialized")
617
734
  with tracer.start_span("generation", context=context) as span:
735
+ try:
736
+ from opentelemetry.trace import Status, StatusCode
737
+
738
+ span.set_status(Status(StatusCode.OK))
739
+ except Exception:
740
+ pass
741
+ span.set_attribute("openinference.span.kind", "LLM")
618
742
  # Set generation attributes
619
743
  for key, value in data.to_span_attributes().items():
620
744
  span.set_attribute(key, value)
621
745
 
746
+ if data.model:
747
+ span.set_attribute("llm.model_name", data.model)
748
+ provider = data.model.split("/")[0] if "/" in data.model else ""
749
+ if provider:
750
+ span.set_attribute("llm.provider", provider)
751
+ if data.input_tokens:
752
+ span.set_attribute("llm.token_count.prompt", data.input_tokens)
753
+ if data.output_tokens:
754
+ span.set_attribute("llm.token_count.completion", data.output_tokens)
755
+ if data.total_tokens:
756
+ span.set_attribute("llm.token_count.total", data.total_tokens)
757
+ if data.cost_usd is not None:
758
+ span.set_attribute("llm.cost.total", data.cost_usd)
759
+
622
760
  # Set prompt/response (truncate if too long)
623
761
  prompt = sanitize_text(data.prompt, max_chars=MAX_LOG_CHARS) or ""
624
762
  response = sanitize_text(data.response, max_chars=MAX_LOG_CHARS) or ""
@@ -637,6 +775,13 @@ class PhoenixAdapter(TrackerPort):
637
775
  safe_template = sanitize_text(data.prompt_template, max_chars=MAX_LOG_CHARS)
638
776
  if safe_template:
639
777
  span.set_attribute("generation.prompt_template", safe_template)
778
+ span.set_attribute("llm.prompt_template.template", safe_template)
779
+ span.set_attribute("llm.prompt_template.version", "v1")
780
+ prompt_vars = data.metadata.get("prompt_variables") if data.metadata else None
781
+ if prompt_vars:
782
+ span.set_attribute(
783
+ "llm.prompt_template.variables", json.dumps(prompt_vars, default=str)
784
+ )
640
785
 
641
786
  def log_rag_trace(self, data: RAGTraceData) -> str:
642
787
  """Log a full RAG trace (retrieval + generation) to Phoenix."""
@@ -660,6 +805,8 @@ class PhoenixAdapter(TrackerPort):
660
805
  span = self._active_spans[trace_id]
661
806
  should_end = True
662
807
 
808
+ span.set_attribute("openinference.span.kind", "CHAIN")
809
+
663
810
  for key, value in data.to_span_attributes().items():
664
811
  span.set_attribute(key, value)
665
812
 
@@ -667,11 +814,23 @@ class PhoenixAdapter(TrackerPort):
667
814
  self.log_retrieval(trace_id, data.retrieval)
668
815
  if data.generation:
669
816
  self.log_generation(trace_id, data.generation)
817
+ output_preview = ""
670
818
  if data.final_answer:
671
- preview = sanitize_text(data.final_answer, max_chars=MAX_LOG_CHARS)
672
- if preview:
673
- span.set_attribute("rag.final_answer", preview)
674
- span.set_attribute("output.value", preview)
819
+ output_preview = sanitize_text(data.final_answer, max_chars=MAX_LOG_CHARS)
820
+ if not output_preview and data.generation and data.generation.response:
821
+ output_preview = sanitize_text(data.generation.response, max_chars=MAX_LOG_CHARS)
822
+ if not output_preview and data.retrieval:
823
+ previews: list[str] = []
824
+ for doc in data.retrieval.candidates:
825
+ if not doc.content:
826
+ continue
827
+ preview = sanitize_text(doc.content, max_chars=MAX_CONTEXT_CHARS)
828
+ if preview:
829
+ previews.append(preview)
830
+ output_preview = "\n".join(previews[:3])
831
+ if output_preview:
832
+ span.set_attribute("rag.final_answer", output_preview)
833
+ span.set_attribute("output.value", output_preview)
675
834
 
676
835
  if safe_query:
677
836
  span.set_attribute("input.value", safe_query)
@@ -697,7 +856,14 @@ def _build_retrieval_payload(
697
856
  payload: list[dict[str, Any]] = []
698
857
  for index, doc in enumerate(documents, start=1):
699
858
  doc_id = doc.chunk_id or doc.source or doc.metadata.get("doc_id") or f"doc_{index}"
700
- item: dict[str, Any] = {"doc_id": doc_id, "score": doc.score}
859
+ preview = ""
860
+ if doc.content:
861
+ preview = sanitize_text(doc.content, max_chars=MAX_CONTEXT_CHARS)
862
+ item: dict[str, Any] = {
863
+ "doc_id": doc_id,
864
+ "score": doc.score,
865
+ "content_preview": preview,
866
+ }
701
867
  if doc.source:
702
868
  item["source"] = doc.source
703
869
  if doc.rerank_score is not None:
@@ -26,6 +26,7 @@ _tracer_provider: TracerProvider | None = None
26
26
  def setup_phoenix_instrumentation(
27
27
  endpoint: str = "http://localhost:6006/v1/traces",
28
28
  service_name: str = "evalvault",
29
+ project_name: str | None = None,
29
30
  enable_langchain: bool = True,
30
31
  enable_openai: bool = True,
31
32
  sample_rate: float = 1.0,
@@ -73,12 +74,13 @@ def setup_phoenix_instrumentation(
73
74
  return None
74
75
 
75
76
  # Create resource with service name
76
- resource = Resource.create(
77
- {
78
- "service.name": service_name,
79
- "service.version": "0.1.0",
80
- }
81
- )
77
+ resource_attributes = {
78
+ "service.name": service_name,
79
+ "service.version": "0.1.0",
80
+ }
81
+ if project_name:
82
+ resource_attributes["project.name"] = project_name
83
+ resource = Resource.create(resource_attributes)
82
84
 
83
85
  # Clamp sample rate between 0 and 1
84
86
  ratio = max(0.0, min(sample_rate, 1.0))
@@ -59,10 +59,15 @@ def ensure_phoenix_instrumentation(
59
59
  if api_token:
60
60
  headers = {"api-key": api_token}
61
61
 
62
+ project_name = getattr(settings, "phoenix_project_name", None)
63
+ if project_name is not None and not isinstance(project_name, str):
64
+ project_name = None
65
+
62
66
  try:
63
67
  setup_phoenix_instrumentation(
64
68
  endpoint=endpoint,
65
69
  service_name="evalvault",
70
+ project_name=project_name,
66
71
  sample_rate=sample_rate,
67
72
  headers=headers,
68
73
  )
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import socket
5
+ import subprocess
6
+ from dataclasses import dataclass
7
+ from shutil import which
8
+ from urllib.parse import urlparse
9
+
10
+ from evalvault.config.settings import Settings, is_production_profile
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ _PHOENIX_CONTAINER = "evalvault-phoenix"
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class Endpoint:
19
+ host: str
20
+ port: int
21
+
22
+
23
+ def _is_local_host(host: str | None) -> bool:
24
+ if not host:
25
+ return False
26
+ return host in {"localhost", "127.0.0.1", "0.0.0.0"}
27
+
28
+
29
+ def _port_is_open(host: str, port: int) -> bool:
30
+ try:
31
+ with socket.create_connection((host, port), timeout=0.2):
32
+ return True
33
+ except OSError:
34
+ return False
35
+
36
+
37
+ def _parse_http_endpoint(url: str | None, default_port: int) -> Endpoint | None:
38
+ if not url or not isinstance(url, str):
39
+ return None
40
+ parsed = urlparse(url)
41
+ if parsed.scheme and parsed.scheme not in {"http", "https"}:
42
+ return None
43
+ host = parsed.hostname or ""
44
+ port = parsed.port or default_port
45
+ if not host or port <= 0:
46
+ return None
47
+ return Endpoint(host=host, port=port)
48
+
49
+
50
+ def _start_mlflow(port: int) -> bool:
51
+ if which("mlflow") is None:
52
+ logger.warning("MLflow CLI not found. Install with: uv sync --extra mlflow")
53
+ return False
54
+ try:
55
+ subprocess.Popen(
56
+ ["mlflow", "server", "--host", "0.0.0.0", "--port", str(port)],
57
+ stdout=subprocess.DEVNULL,
58
+ stderr=subprocess.DEVNULL,
59
+ )
60
+ logger.info("Started MLflow server on port %s", port)
61
+ return True
62
+ except Exception as exc: # pragma: no cover - safety net
63
+ logger.warning("Failed to start MLflow server: %s", exc)
64
+ return False
65
+
66
+
67
+ def _start_phoenix(port: int) -> bool:
68
+ if which("docker") is None:
69
+ logger.warning("Docker not found. Phoenix auto-start skipped.")
70
+ return False
71
+ try:
72
+ start = subprocess.run(
73
+ ["docker", "start", _PHOENIX_CONTAINER],
74
+ check=False,
75
+ capture_output=True,
76
+ text=True,
77
+ )
78
+ if start.returncode != 0:
79
+ subprocess.run(
80
+ [
81
+ "docker",
82
+ "run",
83
+ "-d",
84
+ "-p",
85
+ f"{port}:6006",
86
+ "--name",
87
+ _PHOENIX_CONTAINER,
88
+ "arizephoenix/phoenix:latest",
89
+ ],
90
+ check=False,
91
+ capture_output=True,
92
+ text=True,
93
+ )
94
+ logger.info("Ensured Phoenix container is running on port %s", port)
95
+ return True
96
+ except Exception as exc: # pragma: no cover - safety net
97
+ logger.warning("Failed to start Phoenix container: %s", exc)
98
+ return False
99
+
100
+
101
+ def ensure_local_observability(settings: Settings) -> None:
102
+ if is_production_profile(settings.evalvault_profile):
103
+ return
104
+
105
+ phoenix_endpoint = _parse_http_endpoint(
106
+ getattr(settings, "phoenix_endpoint", None) or "http://localhost:6006/v1/traces",
107
+ 6006,
108
+ )
109
+ if (
110
+ phoenix_endpoint
111
+ and _is_local_host(phoenix_endpoint.host)
112
+ and not _port_is_open(phoenix_endpoint.host, phoenix_endpoint.port)
113
+ ):
114
+ _start_phoenix(phoenix_endpoint.port)
115
+
116
+ mlflow_endpoint = _parse_http_endpoint(getattr(settings, "mlflow_tracking_uri", None), 5000)
117
+ if (
118
+ mlflow_endpoint
119
+ and _is_local_host(mlflow_endpoint.host)
120
+ and not _port_is_open(mlflow_endpoint.host, mlflow_endpoint.port)
121
+ ):
122
+ _start_mlflow(mlflow_endpoint.port)
@@ -55,6 +55,33 @@ def _parse_cors_origins(cors_origins: str | None) -> list[str]:
55
55
  return [origin.strip() for origin in cors_origins.split(",") if origin.strip()]
56
56
 
57
57
 
58
+ def resolve_tracker_providers(provider: str | None) -> list[str]:
59
+ if not provider:
60
+ return []
61
+ normalized = provider.strip().lower()
62
+ if normalized in {"none", "off", "disabled"}:
63
+ return ["none"]
64
+ aliases = {
65
+ "all": ["mlflow", "phoenix"],
66
+ "default": ["mlflow", "phoenix"],
67
+ }
68
+ if normalized in aliases:
69
+ return aliases[normalized]
70
+ separators = [",", "+", "/", "|"]
71
+ for sep in separators:
72
+ normalized = normalized.replace(sep, ",")
73
+ providers = [p.strip() for p in normalized.split(",") if p.strip()]
74
+ if not providers:
75
+ return []
76
+ if "none" in providers and len(providers) > 1:
77
+ raise ValueError("tracker_provider cannot combine 'none' with other providers")
78
+ deduped: list[str] = []
79
+ for entry in providers:
80
+ if entry not in deduped:
81
+ deduped.append(entry)
82
+ return deduped
83
+
84
+
58
85
  SECRET_REFERENCE_FIELDS = (
59
86
  "api_auth_tokens",
60
87
  "knowledge_read_tokens",
@@ -83,13 +110,14 @@ def _validate_production_settings(settings: "Settings") -> None:
83
110
  if settings.llm_provider == "openai" and not settings.openai_api_key:
84
111
  missing.append("OPENAI_API_KEY")
85
112
 
86
- if settings.tracker_provider == "langfuse":
113
+ providers = resolve_tracker_providers(settings.tracker_provider)
114
+ if "langfuse" in providers:
87
115
  if not settings.langfuse_public_key:
88
116
  missing.append("LANGFUSE_PUBLIC_KEY")
89
117
  if not settings.langfuse_secret_key:
90
118
  missing.append("LANGFUSE_SECRET_KEY")
91
119
 
92
- if settings.tracker_provider == "mlflow" and not settings.mlflow_tracking_uri:
120
+ if "mlflow" in providers and not settings.mlflow_tracking_uri:
93
121
  missing.append("MLFLOW_TRACKING_URI")
94
122
 
95
123
  if (
@@ -355,6 +383,14 @@ class Settings(BaseSettings):
355
383
  default="http://localhost:6006/v1/traces",
356
384
  description="Phoenix OTLP endpoint for traces",
357
385
  )
386
+ phoenix_project_name: str = Field(
387
+ default="evalvault",
388
+ description="Phoenix project name for grouping traces",
389
+ )
390
+ phoenix_annotations_enabled: bool = Field(
391
+ default=True,
392
+ description="Enable automatic Phoenix span annotations",
393
+ )
358
394
  phoenix_api_token: str | None = Field(
359
395
  default=None,
360
396
  description="Phoenix API token for cloud deployments (optional)",
@@ -372,8 +408,8 @@ class Settings(BaseSettings):
372
408
 
373
409
  # Tracker Provider Selection
374
410
  tracker_provider: str = Field(
375
- default="langfuse",
376
- description="Tracker provider: 'langfuse', 'mlflow', or 'phoenix'",
411
+ default="mlflow+phoenix",
412
+ description="Tracker provider: 'langfuse', 'mlflow', 'phoenix', 'none', or combinations",
377
413
  )
378
414
 
379
415
  # Cluster map configuration
@@ -1934,6 +1934,8 @@ class RagasEvaluator:
1934
1934
 
1935
1935
  def _calculate_cost(self, model_name: str, prompt_tokens: int, completion_tokens: int) -> float:
1936
1936
  """Calculate estimated cost in USD based on model pricing."""
1937
+ if "ollama" in model_name:
1938
+ return 0.0
1937
1939
  # Find matching model key (exact or substring match)
1938
1940
  price_key = "openai/gpt-4o" # Default fallback
1939
1941
  for key in self.MODEL_PRICING:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.75.0
3
+ Version: 1.77.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -66,6 +66,7 @@ Requires-Dist: ijson>=3.3.0; extra == 'dev'
66
66
  Requires-Dist: kiwipiepy>=0.18.0; extra == 'dev'
67
67
  Requires-Dist: langchain-anthropic; extra == 'dev'
68
68
  Requires-Dist: lm-eval[api]>=0.4.0; extra == 'dev'
69
+ Requires-Dist: manim>=0.18.0; extra == 'dev'
69
70
  Requires-Dist: mkdocs-material>=9.5.0; extra == 'dev'
70
71
  Requires-Dist: mkdocs>=1.5.0; extra == 'dev'
71
72
  Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'dev'