PyPI - pydantic-evals - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

pydantic-evals 0.3.7py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

pydantic_evals/dataset.py CHANGED Viewed

@@ -257,7 +257,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
         name: str | None = None,
         max_concurrency: int | None = None,
         progress: bool = True,
-    ) -> EvaluationReport:
+    ) -> EvaluationReport[InputsT, OutputT, MetadataT]:
         """Evaluates the test cases in the dataset using the given task.
         This method runs the task on each case in the dataset, applies evaluators,
@@ -312,7 +312,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
         name: str | None = None,
         max_concurrency: int | None = None,
         progress: bool = True,
-    ) -> EvaluationReport:
+    ) -> EvaluationReport[InputsT, OutputT, MetadataT]:
         """Evaluates the test cases in the dataset using the given task.
         This is a synchronous wrapper around [`evaluate`][pydantic_evals.Dataset.evaluate] provided for convenience.
@@ -877,7 +877,7 @@ async def _run_task_and_evaluators(
     case: Case[InputsT, OutputT, MetadataT],
     report_case_name: str,
     dataset_evaluators: list[Evaluator[InputsT, OutputT, MetadataT]],
-) -> ReportCase:
+) -> ReportCase[InputsT, OutputT, MetadataT]:
     """Run a task on a case and evaluate the results.
     Args:
@@ -927,7 +927,7 @@ async def _run_task_and_evaluators(
             span_id = f'{context.span_id:016x}'
         fallback_duration = time.time() - t0
-    return ReportCase(
+    return ReportCase[InputsT, OutputT, MetadataT](
         name=report_case_name,
         inputs=case.inputs,
         metadata=case.metadata,

pydantic_evals/reporting/__init__.py CHANGED Viewed

@@ -2,14 +2,14 @@ from __future__ import annotations as _annotations
 from collections import defaultdict
 from collections.abc import Mapping
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from io import StringIO
-from typing import Any, Callable, Literal, Protocol, TypeVar
+from typing import Any, Callable, Generic, Literal, Protocol
-from pydantic import BaseModel
+from pydantic import BaseModel, TypeAdapter
 from rich.console import Console
 from rich.table import Table
-from typing_extensions import TypedDict
+from typing_extensions import TypedDict, TypeVar
 from pydantic_evals._utils import UNSET, Unset
@@ -24,7 +24,9 @@ from .render_numbers import (
 __all__ = (
     'EvaluationReport',
+    'EvaluationReportAdapter',
     'ReportCase',
+    'ReportCaseAdapter',
     'EvaluationRenderer',
     'RenderValueConfig',
     'RenderNumberConfig',
@@ -35,27 +37,32 @@ MISSING_VALUE_STR = '[i]<missing>[/i]'
 EMPTY_CELL_STR = '-'
 EMPTY_AGGREGATE_CELL_STR = ''
+InputsT = TypeVar('InputsT', default=Any)
+OutputT = TypeVar('OutputT', default=Any)
+MetadataT = TypeVar('MetadataT', default=Any)
-class ReportCase(BaseModel):
+@dataclass
+class ReportCase(Generic[InputsT, OutputT, MetadataT]):
     """A single case in an evaluation report."""
     name: str
     """The name of the [case][pydantic_evals.Case]."""
-    inputs: Any
+    inputs: InputsT
     """The inputs to the task, from [`Case.inputs`][pydantic_evals.Case.inputs]."""
-    metadata: Any
+    metadata: MetadataT | None
     """Any metadata associated with the case, from [`Case.metadata`][pydantic_evals.Case.metadata]."""
-    expected_output: Any
+    expected_output: OutputT | None
     """The expected output of the task, from [`Case.expected_output`][pydantic_evals.Case.expected_output]."""
-    output: Any
+    output: OutputT
     """The output of the task execution."""
     metrics: dict[str, float | int]
     attributes: dict[str, Any]
-    scores: dict[str, EvaluationResult[int | float]] = field(init=False)
-    labels: dict[str, EvaluationResult[str]] = field(init=False)
-    assertions: dict[str, EvaluationResult[bool]] = field(init=False)
+    scores: dict[str, EvaluationResult[int | float]]
+    labels: dict[str, EvaluationResult[str]]
+    assertions: dict[str, EvaluationResult[bool]]
     task_duration: float
     total_duration: float  # includes evaluator execution time
@@ -65,6 +72,9 @@ class ReportCase(BaseModel):
     span_id: str
+ReportCaseAdapter = TypeAdapter(ReportCase[Any, Any, Any])
 class ReportCaseAggregate(BaseModel):
     """A synthetic case that summarizes a set of cases."""
@@ -142,12 +152,13 @@ class ReportCaseAggregate(BaseModel):
         )
-class EvaluationReport(BaseModel):
+@dataclass
+class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
     """A report of the results of evaluating a model on a set of cases."""
     name: str
     """The name of the report."""
-    cases: list[ReportCase]
+    cases: list[ReportCase[InputsT, OutputT, MetadataT]]
     """The cases in the report."""
     def averages(self) -> ReportCaseAggregate:
@@ -156,7 +167,7 @@ class EvaluationReport(BaseModel):
     def print(
         self,
         width: int | None = None,
-        baseline: EvaluationReport | None = None,
+        baseline: EvaluationReport[InputsT, OutputT, MetadataT] | None = None,
         include_input: bool = False,
         include_metadata: bool = False,
         include_expected_output: bool = False,
@@ -199,7 +210,7 @@ class EvaluationReport(BaseModel):
     def console_table(
         self,
-        baseline: EvaluationReport | None = None,
+        baseline: EvaluationReport[InputsT, OutputT, MetadataT] | None = None,
         include_input: bool = False,
         include_metadata: bool = False,
         include_expected_output: bool = False,
@@ -250,6 +261,9 @@ class EvaluationReport(BaseModel):
         return io_file.getvalue()
+EvaluationReportAdapter = TypeAdapter(EvaluationReport[Any, Any, Any])
 class RenderValueConfig(TypedDict, total=False):
     """A configuration for rendering a values in an Evaluation report."""

{pydantic_evals-0.3.7.dist-info → pydantic_evals-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-evals
-Version: 0.3.7
+Version: 0.4.0
 Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
 Project-URL: Homepage, https://ai.pydantic.dev/evals
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -32,7 +32,7 @@ Requires-Python: >=3.9
 Requires-Dist: anyio>=0
 Requires-Dist: eval-type-backport>=0; python_version < '3.11'
 Requires-Dist: logfire-api>=1.2.0
-Requires-Dist: pydantic-ai-slim==0.3.7
+Requires-Dist: pydantic-ai-slim==0.4.0
 Requires-Dist: pydantic>=2.10
 Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: rich>=13.9.4

{pydantic_evals-0.3.7.dist-info → pydantic_evals-0.4.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 pydantic_evals/__init__.py,sha256=OKRbfhdc8UZPzrPJMZUQwvzIxLhXmEZxz1ZuD921fy4,839
 pydantic_evals/_utils.py,sha256=PfhmPbdQp-q90s568LuG45zDDXxgO13BEz8MQJK8qw4,2922
-pydantic_evals/dataset.py,sha256=UPyl8Jey18LlcvXQKZ4et5F3AFZ_ar100KREEO5Zfd0,46010
+pydantic_evals/dataset.py,sha256=-wLreOfr7fsr2NqPHeVbrHh_dIlyjjTrY_QK4eBZFnw,46126
 pydantic_evals/generation.py,sha256=-w-4-zpJuW8mLj5ed60LUYm--b-2G42p-UDuPhOQgRE,3492
 pydantic_evals/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pydantic_evals/evaluators/__init__.py,sha256=uj110viFiDvqrIcuDcWexk_uBgJqhKMGPHT2YvDb7LA,624
@@ -15,9 +15,9 @@ pydantic_evals/otel/_context_in_memory_span_exporter.py,sha256=vIDF9-6lDuNKZuSM5
 pydantic_evals/otel/_context_subtree.py,sha256=Iazp4w3IIBMCrkqWL-hTG-2QG_-2X81p794WG9MAsGk,1175
 pydantic_evals/otel/_errors.py,sha256=aW1414eTofpA7R_DUgOeT-gj7YA6OXmm8Y4oYeFukD4,268
 pydantic_evals/otel/span_tree.py,sha256=LV5Hsyo4riJzevHyBz8wxP82S-ry5zeKYi9bKWjGCS8,23057
-pydantic_evals/reporting/__init__.py,sha256=tknRGM2fm8EUENxbq4K5duHZ_DgNzrVWhpGHFkoQ9zo,41677
+pydantic_evals/reporting/__init__.py,sha256=k_3tteqXGh0yGvgpN68gB0CjG9wzrakzDTve2GHend4,42148
 pydantic_evals/reporting/render_numbers.py,sha256=8SKlK3etbD7HnSWWHCE993ceCNLZCepVQ-SsqUIhyxk,6916
-pydantic_evals-0.3.7.dist-info/METADATA,sha256=fAByT-yJm5MsLv76cvJVSx8kHMj9XjBU7VudwlporzU,7785
-pydantic_evals-0.3.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-pydantic_evals-0.3.7.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
-pydantic_evals-0.3.7.dist-info/RECORD,,
+pydantic_evals-0.4.0.dist-info/METADATA,sha256=Fj6Jpt6VisJsz97AID-AEzcpfRWPVuaocmKfVTmyaHY,7785
+pydantic_evals-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+pydantic_evals-0.4.0.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
+pydantic_evals-0.4.0.dist-info/RECORD,,

{pydantic_evals-0.3.7.dist-info → pydantic_evals-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pydantic_evals-0.3.7.dist-info → pydantic_evals-0.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pydantic-evals 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl

pydantic-evals 0.3.7py3-none-any.whl → 0.4.0py3-none-any.whl