PyPI - pydantic-evals - Versions diffs - 1.3.0__tar.gz → 1.4.0__tar.gz - Mend

pydantic-evals 1.3.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-evals might be problematic. Click here for more details.

Files changed (25) hide show

{pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-evals
-Version: 1.3.0
+Version: 1.4.0
 Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
 Project-URL: Homepage, https://ai.pydantic.dev/evals
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -30,7 +30,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
 Requires-Dist: anyio>=0
 Requires-Dist: logfire-api>=3.14.1
-Requires-Dist: pydantic-ai-slim==1.3.0
+Requires-Dist: pydantic-ai-slim==1.4.0
 Requires-Dist: pydantic>=2.10
 Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: rich>=13.9.4

pydantic_evals-1.4.0/pydantic_evals/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""A toolkit for evaluating the execution of arbitrary "stochastic functions", such as LLM calls.
+This package provides functionality for:
+- Creating and loading test datasets with structured inputs and outputs
+- Evaluating model performance using various metrics and evaluators
+- Generating reports for evaluation results
+"""
+from .dataset import Case, Dataset, increment_eval_metric, set_eval_attribute
+__all__ = (
+    'Case',
+    'Dataset',
+    'increment_eval_metric',
+    'set_eval_attribute',
+)

{pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/llm_as_a_judge.py RENAMED Viewed

@@ -201,7 +201,7 @@ async def judge_output_expected(
     ).output
-def set_default_judge_model(model: models.Model | models.KnownModelName) -> None:  # pragma: no cover
+def set_default_judge_model(model: models.Model | models.KnownModelName) -> None:
     """Set the default model used for judging.
     This model is used if `None` is passed to the `model` argument of `judge_output` and `judge_input_output`.

{pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/reporting/__init__.py RENAMED Viewed

@@ -289,12 +289,12 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
         metric_configs: dict[str, RenderNumberConfig] | None = None,
         duration_config: RenderNumberConfig | None = None,
         include_reasons: bool = False,
-    ) -> None:  # pragma: no cover
+    ) -> None:
         """Print this report to the console, optionally comparing it to a baseline report.
         If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
         """
-        if console is None:
+        if console is None:  # pragma: no branch
             console = Console(width=width)
         table = self.console_table(
@@ -318,7 +318,7 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
             include_reasons=include_reasons,
         )
         console.print(table)
-        if include_errors and self.failures:
+        if include_errors and self.failures:  # pragma: no cover
             failures_table = self.failures_table(
                 include_input=include_input,
                 include_metadata=include_metadata,

pydantic_evals-1.3.0/pydantic_evals/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-"""A toolkit for evaluating the execution of arbitrary "stochastic functions", such as LLM calls.
-This package provides functionality for:
-- Creating and loading test datasets with structured inputs and outputs
-- Evaluating model performance using various metrics and evaluators
-- Generating reports for evaluation results
-TODO(DavidM): Implement serialization of reports for later comparison, and add git hashes etc.
-  Note: I made pydantic_ai.evals.reports.EvalReport a BaseModel specifically to make this easier
-TODO(DavidM): Add commit hash, timestamp, and other metadata to reports (like pytest-speed does), possibly in a dedicated struct
-TODO(DavidM): Implement a CLI with some pytest-like filtering API to make it easier to run only specific cases
-"""
-from .dataset import Case, Dataset
-__all__ = (
-    'Case',
-    'Dataset',
-)