PyPI - pydantic-evals - Versions diffs - 1.2.1__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

pydantic-evals 1.2.1py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-evals might be problematic. Click here for more details.

Files changed (8) hide show

pydantic_evals/__init__.py CHANGED Viewed

@@ -4,16 +4,13 @@ This package provides functionality for:
 - Creating and loading test datasets with structured inputs and outputs
 - Evaluating model performance using various metrics and evaluators
 - Generating reports for evaluation results
-TODO(DavidM): Implement serialization of reports for later comparison, and add git hashes etc.
-  Note: I made pydantic_ai.evals.reports.EvalReport a BaseModel specifically to make this easier
-TODO(DavidM): Add commit hash, timestamp, and other metadata to reports (like pytest-speed does), possibly in a dedicated struct
-TODO(DavidM): Implement a CLI with some pytest-like filtering API to make it easier to run only specific cases
 """
-from .dataset import Case, Dataset
+from .dataset import Case, Dataset, increment_eval_metric, set_eval_attribute
 __all__ = (
     'Case',
     'Dataset',
+    'increment_eval_metric',
+    'set_eval_attribute',
 )

pydantic_evals/dataset.py CHANGED Viewed

@@ -343,6 +343,8 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
                 trace_id=trace_id,
             )
             if (averages := report.averages()) is not None and averages.assertions is not None:
+                experiment_metadata = {'n_cases': len(self.cases), 'averages': averages}
+                eval_span.set_attribute('logfire.experiment.metadata', experiment_metadata)
                 eval_span.set_attribute('assertion_pass_rate', averages.assertions)
         return report

pydantic_evals/evaluators/llm_as_a_judge.py CHANGED Viewed

@@ -201,7 +201,7 @@ async def judge_output_expected(
     ).output
-def set_default_judge_model(model: models.Model | models.KnownModelName) -> None:  # pragma: no cover
+def set_default_judge_model(model: models.Model | models.KnownModelName) -> None:
     """Set the default model used for judging.
     This model is used if `None` is passed to the `model` argument of `judge_output` and `judge_input_output`.

pydantic_evals/reporting/__init__.py CHANGED Viewed

@@ -289,12 +289,12 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
         metric_configs: dict[str, RenderNumberConfig] | None = None,
         duration_config: RenderNumberConfig | None = None,
         include_reasons: bool = False,
-    ) -> None:  # pragma: no cover
+    ) -> None:
         """Print this report to the console, optionally comparing it to a baseline report.
         If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
         """
-        if console is None:
+        if console is None:  # pragma: no branch
             console = Console(width=width)
         table = self.console_table(
@@ -318,7 +318,7 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
             include_reasons=include_reasons,
         )
         console.print(table)
-        if include_errors and self.failures:
+        if include_errors and self.failures:  # pragma: no cover
             failures_table = self.failures_table(
                 include_input=include_input,
                 include_metadata=include_metadata,

{pydantic_evals-1.2.1.dist-info → pydantic_evals-1.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-evals
-Version: 1.2.1
+Version: 1.4.0
 Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
 Project-URL: Homepage, https://ai.pydantic.dev/evals
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -30,7 +30,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
 Requires-Dist: anyio>=0
 Requires-Dist: logfire-api>=3.14.1
-Requires-Dist: pydantic-ai-slim==1.2.1
+Requires-Dist: pydantic-ai-slim==1.4.0
 Requires-Dist: pydantic>=2.10
 Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: rich>=13.9.4

{pydantic_evals-1.2.1.dist-info → pydantic_evals-1.4.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-pydantic_evals/__init__.py,sha256=OKRbfhdc8UZPzrPJMZUQwvzIxLhXmEZxz1ZuD921fy4,839
+pydantic_evals/__init__.py,sha256=X5m0fcEZ4e8hVhToX5PludEp8t7NTBmdNInFFM5hM_I,504
 pydantic_evals/_utils.py,sha256=1muGTc2zqjwxqngz6quRSLoZM88onjp0Xgt-a9n2aPQ,4111
-pydantic_evals/dataset.py,sha256=hX9wrBvbWha1RLomaBY_mzKudWWKMT9doj8VPH8NflU,50437
+pydantic_evals/dataset.py,sha256=XobDGjjTj0oR5CARw8sWwC0KrIg0tpRzRiOkg8-Eeyc,50618
 pydantic_evals/generation.py,sha256=Qy03z7vGvE14cUBsqjorEx7Ar1KkR7Fb5SItZB429fc,3715
 pydantic_evals/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pydantic_evals/evaluators/__init__.py,sha256=E_JT6o96Ef-oS_IZ1Hyy95NRLwz7EOHewp-o13IdXEM,1032
@@ -8,16 +8,16 @@ pydantic_evals/evaluators/_run_evaluator.py,sha256=uGmH67gCTeF9BSprCiBC4DtKEpKLr
 pydantic_evals/evaluators/common.py,sha256=Cc9RMsSf5P2gcq3IDwmZxgfo1xnu7HEehiAS2Hgibz4,11609
 pydantic_evals/evaluators/context.py,sha256=mTxcm0Hvkev9htpqwoJMCJIqEYBtY5g86SXcjoqQxHY,3884
 pydantic_evals/evaluators/evaluator.py,sha256=ylfKRytoM9KzbZkSsFkEEnsg4XhK4usuyy1Rb1emoPo,11474
-pydantic_evals/evaluators/llm_as_a_judge.py,sha256=BPdUfEsLPSxN2kJPt3dtJBRCBP46ctRoW_n24WubaB0,9567
+pydantic_evals/evaluators/llm_as_a_judge.py,sha256=4jAg-pAk7Ae5IFO1p3dar4Ncju__S6IORcH9LnU1fXs,9547
 pydantic_evals/evaluators/spec.py,sha256=szAUsY4gb8KK_l1R81HYrByh4Rawrjav7w9835FZg1w,6690
 pydantic_evals/otel/__init__.py,sha256=i2p3vDrOW039N4XM-UkozDhCm0ZmE6ZSs1yV5t03vd0,117
 pydantic_evals/otel/_context_in_memory_span_exporter.py,sha256=FrG0pXKjuvTp3bXNdrUyzdPkqm0DQWe4ehkiHaxSvz4,6742
 pydantic_evals/otel/_context_subtree.py,sha256=Iazp4w3IIBMCrkqWL-hTG-2QG_-2X81p794WG9MAsGk,1175
 pydantic_evals/otel/_errors.py,sha256=aW1414eTofpA7R_DUgOeT-gj7YA6OXmm8Y4oYeFukD4,268
 pydantic_evals/otel/span_tree.py,sha256=RzX4VGpEqc2QUhkyxMTXtBRo5yHHO1c0hI7QJJuiXPU,23043
-pydantic_evals/reporting/__init__.py,sha256=702W2BjMiXhKQz6T4sor6Zi2SjYTDQypCvealJrwTFA,54067
+pydantic_evals/reporting/__init__.py,sha256=LGPZRKyRAl7Apx44-UnYENsAltknakf3dcYkjwoTSFw,54088
 pydantic_evals/reporting/render_numbers.py,sha256=8SKlK3etbD7HnSWWHCE993ceCNLZCepVQ-SsqUIhyxk,6916
-pydantic_evals-1.2.1.dist-info/METADATA,sha256=jsE9ujRLvxt780e7as5aSMUfcE8Ns1VyclE0f9-spnE,7844
-pydantic_evals-1.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-pydantic_evals-1.2.1.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
-pydantic_evals-1.2.1.dist-info/RECORD,,
+pydantic_evals-1.4.0.dist-info/METADATA,sha256=ErkaNhP07TA5vw0L7kvaGj4ZsFJ6kBzsVCcUJA9g95s,7844
+pydantic_evals-1.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+pydantic_evals-1.4.0.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
+pydantic_evals-1.4.0.dist-info/RECORD,,

{pydantic_evals-1.2.1.dist-info → pydantic_evals-1.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pydantic_evals-1.2.1.dist-info → pydantic_evals-1.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pydantic-evals 1.2.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

pydantic-evals 1.2.1py3-none-any.whl → 1.4.0py3-none-any.whl