evalvault 1.75.0__py3-none-any.whl → 1.76.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +99 -63
- evalvault/adapters/inbound/api/routers/config.py +3 -1
- evalvault/adapters/inbound/cli/commands/method.py +2 -2
- evalvault/adapters/inbound/cli/commands/run.py +146 -28
- evalvault/adapters/inbound/cli/commands/run_helpers.py +157 -55
- evalvault/adapters/outbound/llm/factory.py +1 -1
- evalvault/adapters/outbound/phoenix/sync_service.py +99 -0
- evalvault/adapters/outbound/tracker/mlflow_adapter.py +209 -54
- evalvault/adapters/outbound/tracker/phoenix_adapter.py +158 -9
- evalvault/config/instrumentation.py +8 -6
- evalvault/config/phoenix_support.py +5 -0
- evalvault/config/settings.py +40 -4
- evalvault/domain/services/evaluator.py +2 -0
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/METADATA +1 -1
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/RECORD +18 -18
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/WHEEL +0 -0
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.75.0.dist-info → evalvault-1.76.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
from collections.abc import Callable, Sequence
|
|
7
7
|
from dataclasses import asdict, dataclass
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import click
|
|
12
12
|
import typer
|
|
@@ -25,7 +25,7 @@ from evalvault.config.phoenix_support import (
|
|
|
25
25
|
instrumentation_span,
|
|
26
26
|
set_span_attributes,
|
|
27
27
|
)
|
|
28
|
-
from evalvault.config.settings import Settings
|
|
28
|
+
from evalvault.config.settings import Settings, resolve_tracker_providers
|
|
29
29
|
from evalvault.domain.entities import (
|
|
30
30
|
Dataset,
|
|
31
31
|
EvaluationRun,
|
|
@@ -58,7 +58,7 @@ from evalvault.ports.outbound.tracker_port import TrackerPort
|
|
|
58
58
|
from ..utils.console import print_cli_error, print_cli_warning
|
|
59
59
|
from ..utils.formatters import format_score, format_status
|
|
60
60
|
|
|
61
|
-
TrackerType =
|
|
61
|
+
TrackerType = str
|
|
62
62
|
apply_retriever_to_dataset = retriever_context.apply_retriever_to_dataset
|
|
63
63
|
|
|
64
64
|
|
|
@@ -319,15 +319,22 @@ def _display_memory_insights(insights: dict[str, Any], console: Console) -> None
|
|
|
319
319
|
console.print(Panel(panel_body, title="Domain Memory Insights", border_style="magenta"))
|
|
320
320
|
|
|
321
321
|
|
|
322
|
-
def _get_tracker(
|
|
322
|
+
def _get_tracker(
|
|
323
|
+
settings: Settings,
|
|
324
|
+
tracker_type: str,
|
|
325
|
+
console: Console,
|
|
326
|
+
*,
|
|
327
|
+
required: bool = False,
|
|
328
|
+
) -> TrackerPort | None:
|
|
323
329
|
"""Get the appropriate tracker adapter based on type."""
|
|
324
330
|
if tracker_type == "langfuse":
|
|
325
331
|
if not settings.langfuse_public_key or not settings.langfuse_secret_key:
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
332
|
+
message = "Langfuse 자격 증명이 설정되지 않았습니다."
|
|
333
|
+
tips = ["LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY를 .env에 추가하세요."]
|
|
334
|
+
if required:
|
|
335
|
+
print_cli_error(console, message, fixes=tips)
|
|
336
|
+
raise typer.Exit(2)
|
|
337
|
+
print_cli_warning(console, message + " 로깅을 건너뜁니다.", tips=tips)
|
|
331
338
|
return None
|
|
332
339
|
from evalvault.adapters.outbound.tracker.langfuse_adapter import LangfuseAdapter
|
|
333
340
|
|
|
@@ -339,11 +346,12 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
|
|
|
339
346
|
|
|
340
347
|
elif tracker_type == "mlflow":
|
|
341
348
|
if not settings.mlflow_tracking_uri:
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
349
|
+
message = "MLflow tracking URI가 설정되지 않았습니다."
|
|
350
|
+
tips = ["MLFLOW_TRACKING_URI 환경 변수를 설정하세요."]
|
|
351
|
+
if required:
|
|
352
|
+
print_cli_error(console, message, fixes=tips)
|
|
353
|
+
raise typer.Exit(2)
|
|
354
|
+
print_cli_warning(console, message + " 로깅을 건너뜁니다.", tips=tips)
|
|
347
355
|
return None
|
|
348
356
|
try:
|
|
349
357
|
from evalvault.adapters.outbound.tracker.mlflow_adapter import MLflowAdapter
|
|
@@ -353,11 +361,12 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
|
|
|
353
361
|
experiment_name=settings.mlflow_experiment_name,
|
|
354
362
|
)
|
|
355
363
|
except ImportError:
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
364
|
+
message = "MLflow extra가 설치되지 않았습니다."
|
|
365
|
+
tips = ["uv sync --extra mlflow 명령으로 구성요소를 설치하세요."]
|
|
366
|
+
if required:
|
|
367
|
+
print_cli_error(console, message, fixes=tips)
|
|
368
|
+
raise typer.Exit(2)
|
|
369
|
+
print_cli_warning(console, message, tips=tips)
|
|
361
370
|
return None
|
|
362
371
|
|
|
363
372
|
elif tracker_type == "phoenix":
|
|
@@ -367,13 +376,16 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
|
|
|
367
376
|
return PhoenixAdapter(
|
|
368
377
|
endpoint=settings.phoenix_endpoint,
|
|
369
378
|
service_name="evalvault",
|
|
379
|
+
project_name=getattr(settings, "phoenix_project_name", None),
|
|
380
|
+
annotations_enabled=getattr(settings, "phoenix_annotations_enabled", True),
|
|
370
381
|
)
|
|
371
382
|
except ImportError:
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
383
|
+
message = "Phoenix extra가 설치되지 않았습니다."
|
|
384
|
+
tips = ["uv sync --extra phoenix 명령으로 의존성을 추가하세요."]
|
|
385
|
+
if required:
|
|
386
|
+
print_cli_error(console, message, fixes=tips)
|
|
387
|
+
raise typer.Exit(2)
|
|
388
|
+
print_cli_warning(console, message, tips=tips)
|
|
377
389
|
return None
|
|
378
390
|
|
|
379
391
|
else:
|
|
@@ -385,6 +397,22 @@ def _get_tracker(settings: Settings, tracker_type: str, console: Console) -> Tra
|
|
|
385
397
|
return None
|
|
386
398
|
|
|
387
399
|
|
|
400
|
+
def _resolve_tracker_list(tracker_type: str) -> list[str]:
|
|
401
|
+
providers = resolve_tracker_providers(tracker_type)
|
|
402
|
+
if not providers:
|
|
403
|
+
return []
|
|
404
|
+
if providers == ["none"]:
|
|
405
|
+
return ["none"]
|
|
406
|
+
supported = {"langfuse", "mlflow", "phoenix"}
|
|
407
|
+
unknown = [entry for entry in providers if entry not in supported]
|
|
408
|
+
if unknown:
|
|
409
|
+
raise ValueError(f"Unknown tracker provider(s): {', '.join(unknown)}")
|
|
410
|
+
required = {"mlflow", "phoenix"}
|
|
411
|
+
if not required.issubset(set(providers)):
|
|
412
|
+
raise ValueError("tracker must include both 'mlflow' and 'phoenix'")
|
|
413
|
+
return providers
|
|
414
|
+
|
|
415
|
+
|
|
388
416
|
def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
|
|
389
417
|
"""Build a Phoenix UI URL for the given trace ID."""
|
|
390
418
|
|
|
@@ -395,7 +423,7 @@ def _build_phoenix_trace_url(endpoint: str, trace_id: str) -> str:
|
|
|
395
423
|
return f"{base.rstrip('/')}/#/traces/{trace_id}"
|
|
396
424
|
|
|
397
425
|
|
|
398
|
-
def
|
|
426
|
+
def _log_to_trackers(
|
|
399
427
|
settings: Settings,
|
|
400
428
|
result,
|
|
401
429
|
console: Console,
|
|
@@ -404,18 +432,39 @@ def _log_to_tracker(
|
|
|
404
432
|
phoenix_options: dict[str, Any] | None = None,
|
|
405
433
|
log_phoenix_traces_fn: Callable[..., int] | None = None,
|
|
406
434
|
) -> None:
|
|
407
|
-
"""Log evaluation results to the specified tracker."""
|
|
408
|
-
|
|
409
|
-
|
|
435
|
+
"""Log evaluation results to the specified tracker(s)."""
|
|
436
|
+
try:
|
|
437
|
+
tracker_types = _resolve_tracker_list(tracker_type)
|
|
438
|
+
except ValueError as exc:
|
|
439
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
440
|
+
raise typer.Exit(2) from exc
|
|
441
|
+
if not tracker_types or tracker_types == ["none"]:
|
|
410
442
|
return
|
|
411
443
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
444
|
+
result.tracker_metadata.setdefault("tracker_providers", tracker_types)
|
|
445
|
+
for provider in tracker_types:
|
|
446
|
+
tracker = _get_tracker(settings, provider, console, required=True)
|
|
447
|
+
if tracker is None:
|
|
448
|
+
raise typer.Exit(2)
|
|
449
|
+
tracker_name = provider.capitalize()
|
|
450
|
+
trace_id: str | None = None
|
|
451
|
+
with console.status(f"[bold green]Logging to {tracker_name}..."):
|
|
452
|
+
try:
|
|
453
|
+
trace_id = tracker.log_evaluation_run(result)
|
|
454
|
+
console.print(f"[green]Logged to {tracker_name}[/green] (trace_id: {trace_id})")
|
|
455
|
+
except Exception as exc:
|
|
456
|
+
print_cli_error(
|
|
457
|
+
console,
|
|
458
|
+
f"{tracker_name} 로깅에 실패했습니다.",
|
|
459
|
+
details=str(exc),
|
|
460
|
+
)
|
|
461
|
+
raise typer.Exit(2) from exc
|
|
462
|
+
|
|
463
|
+
if trace_id:
|
|
464
|
+
provider_meta = result.tracker_metadata.setdefault(provider, {})
|
|
465
|
+
if isinstance(provider_meta, dict):
|
|
466
|
+
provider_meta.setdefault("trace_id", trace_id)
|
|
467
|
+
if provider == "phoenix":
|
|
419
468
|
endpoint = getattr(settings, "phoenix_endpoint", "http://localhost:6006/v1/traces")
|
|
420
469
|
if not isinstance(endpoint, str) or not endpoint:
|
|
421
470
|
endpoint = "http://localhost:6006/v1/traces"
|
|
@@ -431,27 +480,78 @@ def _log_to_tracker(
|
|
|
431
480
|
trace_url = get_phoenix_trace_url(result.tracker_metadata)
|
|
432
481
|
if trace_url:
|
|
433
482
|
console.print(f"[dim]Phoenix Trace: {trace_url}[/dim]")
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
483
|
+
|
|
484
|
+
options = phoenix_options or {}
|
|
485
|
+
log_traces = log_phoenix_traces_fn or log_phoenix_traces
|
|
486
|
+
extra = log_traces(
|
|
487
|
+
tracker,
|
|
488
|
+
result,
|
|
489
|
+
max_traces=options.get("max_traces"),
|
|
490
|
+
metadata=options.get("metadata"),
|
|
491
|
+
)
|
|
492
|
+
if extra:
|
|
493
|
+
console.print(
|
|
494
|
+
f"[dim]Recorded {extra} Phoenix RAG trace(s) for detailed observability.[/dim]"
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _log_analysis_artifacts(
|
|
499
|
+
settings: Settings,
|
|
500
|
+
result: EvaluationRun,
|
|
501
|
+
console: Console,
|
|
502
|
+
tracker_type: str,
|
|
503
|
+
*,
|
|
504
|
+
analysis_payload: dict[str, Any],
|
|
505
|
+
artifact_index: dict[str, Any],
|
|
506
|
+
report_text: str,
|
|
507
|
+
output_path: Path,
|
|
508
|
+
report_path: Path,
|
|
509
|
+
) -> None:
|
|
510
|
+
"""Log analysis artifacts to tracker(s) as a separate trace/run."""
|
|
511
|
+
try:
|
|
512
|
+
tracker_types = _resolve_tracker_list(tracker_type)
|
|
513
|
+
except ValueError as exc:
|
|
514
|
+
print_cli_error(console, "Tracker 설정이 올바르지 않습니다.", details=str(exc))
|
|
515
|
+
raise typer.Exit(2) from exc
|
|
516
|
+
if not tracker_types or tracker_types == ["none"]:
|
|
517
|
+
return
|
|
518
|
+
|
|
519
|
+
metadata = {
|
|
520
|
+
"run_id": result.run_id,
|
|
521
|
+
"dataset_name": result.dataset_name,
|
|
522
|
+
"dataset_version": result.dataset_version,
|
|
523
|
+
"analysis_output": str(output_path),
|
|
524
|
+
"analysis_report": str(report_path),
|
|
525
|
+
"analysis_artifacts_dir": artifact_index.get("dir"),
|
|
526
|
+
"event_type": "analysis",
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
for provider in tracker_types:
|
|
530
|
+
tracker = _get_tracker(settings, provider, console, required=True)
|
|
531
|
+
if tracker is None:
|
|
532
|
+
raise typer.Exit(2)
|
|
533
|
+
trace_name = f"analysis-{result.run_id[:8]}"
|
|
534
|
+
try:
|
|
535
|
+
trace_id = tracker.start_trace(trace_name, metadata=metadata)
|
|
536
|
+
tracker.save_artifact(
|
|
537
|
+
trace_id, "analysis_payload", analysis_payload, artifact_type="json"
|
|
439
538
|
)
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
extra = log_traces(
|
|
446
|
-
tracker,
|
|
447
|
-
result,
|
|
448
|
-
max_traces=options.get("max_traces"),
|
|
449
|
-
metadata=options.get("metadata"),
|
|
450
|
-
)
|
|
451
|
-
if extra:
|
|
539
|
+
tracker.save_artifact(
|
|
540
|
+
trace_id, "analysis_artifacts", artifact_index, artifact_type="json"
|
|
541
|
+
)
|
|
542
|
+
tracker.save_artifact(trace_id, "analysis_report", report_text, artifact_type="text")
|
|
543
|
+
tracker.end_trace(trace_id)
|
|
452
544
|
console.print(
|
|
453
|
-
f"[
|
|
545
|
+
f"[green]Logged analysis artifacts to {provider.capitalize()}[/green] "
|
|
546
|
+
f"(trace_id: {trace_id})"
|
|
547
|
+
)
|
|
548
|
+
except Exception as exc:
|
|
549
|
+
print_cli_error(
|
|
550
|
+
console,
|
|
551
|
+
f"{provider.capitalize()} 분석 로깅에 실패했습니다.",
|
|
552
|
+
details=str(exc),
|
|
454
553
|
)
|
|
554
|
+
raise typer.Exit(2) from exc
|
|
455
555
|
|
|
456
556
|
|
|
457
557
|
def _save_to_db(
|
|
@@ -1173,8 +1273,10 @@ def _collect_prompt_metadata(
|
|
|
1173
1273
|
prompt_path=target,
|
|
1174
1274
|
content=content,
|
|
1175
1275
|
)
|
|
1176
|
-
|
|
1177
|
-
|
|
1276
|
+
summary_dict = asdict(summary)
|
|
1277
|
+
summary_dict["content_preview"] = _build_content_preview(content)
|
|
1278
|
+
summary_dict["content"] = content
|
|
1279
|
+
summaries.append(summary_dict)
|
|
1178
1280
|
|
|
1179
1281
|
return summaries
|
|
1180
1282
|
|
|
@@ -97,7 +97,7 @@ def _resolve_faithfulness_fallback_config(
|
|
|
97
97
|
|
|
98
98
|
def _default_faithfulness_fallback_model(provider: str) -> str | None:
|
|
99
99
|
if provider == "ollama":
|
|
100
|
-
return "
|
|
100
|
+
return "qwen3:8b"
|
|
101
101
|
if provider == "vllm":
|
|
102
102
|
return "gpt-oss-120b"
|
|
103
103
|
return None
|
|
@@ -104,6 +104,22 @@ class PhoenixSyncService:
|
|
|
104
104
|
dataset_description=description,
|
|
105
105
|
)
|
|
106
106
|
except Exception as exc: # pragma: no cover - HTTP/serialization errors
|
|
107
|
+
message = str(exc)
|
|
108
|
+
if "already exists" in message:
|
|
109
|
+
existing = self._find_dataset_by_name(dataset_name)
|
|
110
|
+
if existing:
|
|
111
|
+
dataset_obj = self._client.datasets.get_dataset(dataset=existing["id"])
|
|
112
|
+
dataset_url = self._client.experiments.get_dataset_experiments_url(
|
|
113
|
+
dataset_obj.id
|
|
114
|
+
)
|
|
115
|
+
return PhoenixDatasetInfo(
|
|
116
|
+
dataset_id=dataset_obj.id,
|
|
117
|
+
dataset_name=dataset_obj.name,
|
|
118
|
+
dataset_version_id=dataset_obj.version_id,
|
|
119
|
+
url=dataset_url,
|
|
120
|
+
description=description,
|
|
121
|
+
example_count=getattr(dataset_obj, "examples", None),
|
|
122
|
+
)
|
|
107
123
|
raise PhoenixSyncError(f"Dataset upload failed: {exc}") from exc
|
|
108
124
|
|
|
109
125
|
dataset_url = self._client.experiments.get_dataset_experiments_url(phoenix_dataset.id)
|
|
@@ -173,6 +189,74 @@ class PhoenixSyncService:
|
|
|
173
189
|
)
|
|
174
190
|
return examples
|
|
175
191
|
|
|
192
|
+
def _find_dataset_by_name(self, dataset_name: str) -> dict[str, Any] | None:
|
|
193
|
+
try:
|
|
194
|
+
datasets = self._client.datasets.list()
|
|
195
|
+
except Exception:
|
|
196
|
+
return None
|
|
197
|
+
for entry in datasets:
|
|
198
|
+
if entry.get("name") == dataset_name:
|
|
199
|
+
return entry
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
def sync_prompts(
|
|
203
|
+
self,
|
|
204
|
+
*,
|
|
205
|
+
prompt_entries: list[dict[str, Any]],
|
|
206
|
+
model_name: str,
|
|
207
|
+
model_provider: str,
|
|
208
|
+
prompt_set_name: str | None = None,
|
|
209
|
+
) -> list[dict[str, Any]]:
|
|
210
|
+
"""Create prompt versions in Phoenix Prompt Management."""
|
|
211
|
+
|
|
212
|
+
if not prompt_entries:
|
|
213
|
+
return []
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
from phoenix.client.resources.prompts import PromptVersion
|
|
217
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
218
|
+
raise PhoenixSyncError("Phoenix prompt client unavailable") from exc
|
|
219
|
+
|
|
220
|
+
synced: list[dict[str, Any]] = []
|
|
221
|
+
for index, entry in enumerate(prompt_entries, start=1):
|
|
222
|
+
name = entry.get("name") or entry.get("role") or f"prompt_{index}"
|
|
223
|
+
content = entry.get("content") or entry.get("content_preview") or ""
|
|
224
|
+
if not content:
|
|
225
|
+
continue
|
|
226
|
+
prompt_version = PromptVersion(
|
|
227
|
+
[{"role": "system", "content": content}],
|
|
228
|
+
model_name=model_name,
|
|
229
|
+
model_provider=model_provider,
|
|
230
|
+
template_format="NONE",
|
|
231
|
+
)
|
|
232
|
+
prompt_metadata = {
|
|
233
|
+
"kind": entry.get("kind"),
|
|
234
|
+
"role": entry.get("role"),
|
|
235
|
+
"checksum": entry.get("checksum"),
|
|
236
|
+
"status": entry.get("status"),
|
|
237
|
+
"source": entry.get("source") or entry.get("path"),
|
|
238
|
+
"order": index,
|
|
239
|
+
}
|
|
240
|
+
if prompt_set_name:
|
|
241
|
+
prompt_metadata["prompt_set"] = prompt_set_name
|
|
242
|
+
try:
|
|
243
|
+
version = self._client.prompts.create(
|
|
244
|
+
version=prompt_version,
|
|
245
|
+
name=name,
|
|
246
|
+
prompt_description=entry.get("notes"),
|
|
247
|
+
prompt_metadata=_as_serializable(prompt_metadata),
|
|
248
|
+
)
|
|
249
|
+
synced.append(
|
|
250
|
+
{
|
|
251
|
+
**entry,
|
|
252
|
+
"phoenix_prompt_version_id": getattr(version, "id", None),
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
except Exception as exc: # pragma: no cover - HTTP errors
|
|
256
|
+
raise PhoenixSyncError(f"Prompt sync failed: {exc}") from exc
|
|
257
|
+
|
|
258
|
+
return synced
|
|
259
|
+
|
|
176
260
|
def _build_input_payload(self, test_case: TestCase) -> dict[str, Any]:
|
|
177
261
|
return {
|
|
178
262
|
"question": test_case.question,
|
|
@@ -258,6 +342,21 @@ def build_experiment_metadata(
|
|
|
258
342
|
"total_test_cases": run.total_test_cases,
|
|
259
343
|
"metrics": metrics,
|
|
260
344
|
}
|
|
345
|
+
if run.results:
|
|
346
|
+
latencies = [r.latency_ms for r in run.results if r.latency_ms]
|
|
347
|
+
tokens = [r.tokens_used for r in run.results if r.tokens_used]
|
|
348
|
+
costs = [r.cost_usd for r in run.results if r.cost_usd is not None]
|
|
349
|
+
if latencies:
|
|
350
|
+
payload["avg_latency_ms"] = round(sum(latencies) / len(latencies), 2)
|
|
351
|
+
if tokens:
|
|
352
|
+
payload["avg_tokens"] = round(sum(tokens) / len(tokens), 2)
|
|
353
|
+
if costs:
|
|
354
|
+
payload["avg_cost_usd"] = round(sum(costs) / len(costs), 6)
|
|
355
|
+
if run.total_tokens:
|
|
356
|
+
payload["total_tokens"] = run.total_tokens
|
|
357
|
+
if run.total_cost_usd is not None:
|
|
358
|
+
payload["total_cost_usd"] = run.total_cost_usd
|
|
359
|
+
payload["error_rate"] = round(1 - run.pass_rate, 4)
|
|
261
360
|
if reliability_snapshot:
|
|
262
361
|
payload["reliability_snapshot"] = reliability_snapshot
|
|
263
362
|
if dataset.metadata:
|