pydantic-evals 1.3.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-evals might be problematic. Click here for more details.

Files changed (25) hide show
  1. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/PKG-INFO +2 -2
  2. pydantic_evals-1.4.0/pydantic_evals/__init__.py +16 -0
  3. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/llm_as_a_judge.py +1 -1
  4. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/reporting/__init__.py +3 -3
  5. pydantic_evals-1.3.0/pydantic_evals/__init__.py +0 -19
  6. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/.gitignore +0 -0
  7. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/LICENSE +0 -0
  8. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/README.md +0 -0
  9. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/_utils.py +0 -0
  10. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/dataset.py +0 -0
  11. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/__init__.py +0 -0
  12. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/_run_evaluator.py +0 -0
  13. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/common.py +0 -0
  14. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/context.py +0 -0
  15. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/evaluator.py +0 -0
  16. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/evaluators/spec.py +0 -0
  17. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/generation.py +0 -0
  18. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/otel/__init__.py +0 -0
  19. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/otel/_context_in_memory_span_exporter.py +0 -0
  20. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/otel/_context_subtree.py +0 -0
  21. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/otel/_errors.py +0 -0
  22. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/otel/span_tree.py +0 -0
  23. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/py.typed +0 -0
  24. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pydantic_evals/reporting/render_numbers.py +0 -0
  25. {pydantic_evals-1.3.0 → pydantic_evals-1.4.0}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydantic-evals
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
5
5
  Project-URL: Homepage, https://ai.pydantic.dev/evals
6
6
  Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -30,7 +30,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
30
30
  Requires-Python: >=3.10
31
31
  Requires-Dist: anyio>=0
32
32
  Requires-Dist: logfire-api>=3.14.1
33
- Requires-Dist: pydantic-ai-slim==1.3.0
33
+ Requires-Dist: pydantic-ai-slim==1.4.0
34
34
  Requires-Dist: pydantic>=2.10
35
35
  Requires-Dist: pyyaml>=6.0.2
36
36
  Requires-Dist: rich>=13.9.4
@@ -0,0 +1,16 @@
1
+ """A toolkit for evaluating the execution of arbitrary "stochastic functions", such as LLM calls.
2
+
3
+ This package provides functionality for:
4
+ - Creating and loading test datasets with structured inputs and outputs
5
+ - Evaluating model performance using various metrics and evaluators
6
+ - Generating reports for evaluation results
7
+ """
8
+
9
+ from .dataset import Case, Dataset, increment_eval_metric, set_eval_attribute
10
+
11
+ __all__ = (
12
+ 'Case',
13
+ 'Dataset',
14
+ 'increment_eval_metric',
15
+ 'set_eval_attribute',
16
+ )
@@ -201,7 +201,7 @@ async def judge_output_expected(
201
201
  ).output
202
202
 
203
203
 
204
- def set_default_judge_model(model: models.Model | models.KnownModelName) -> None: # pragma: no cover
204
+ def set_default_judge_model(model: models.Model | models.KnownModelName) -> None:
205
205
  """Set the default model used for judging.
206
206
 
207
207
  This model is used if `None` is passed to the `model` argument of `judge_output` and `judge_input_output`.
@@ -289,12 +289,12 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
289
289
  metric_configs: dict[str, RenderNumberConfig] | None = None,
290
290
  duration_config: RenderNumberConfig | None = None,
291
291
  include_reasons: bool = False,
292
- ) -> None: # pragma: no cover
292
+ ) -> None:
293
293
  """Print this report to the console, optionally comparing it to a baseline report.
294
294
 
295
295
  If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
296
296
  """
297
- if console is None:
297
+ if console is None: # pragma: no branch
298
298
  console = Console(width=width)
299
299
 
300
300
  table = self.console_table(
@@ -318,7 +318,7 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
318
318
  include_reasons=include_reasons,
319
319
  )
320
320
  console.print(table)
321
- if include_errors and self.failures:
321
+ if include_errors and self.failures: # pragma: no cover
322
322
  failures_table = self.failures_table(
323
323
  include_input=include_input,
324
324
  include_metadata=include_metadata,
@@ -1,19 +0,0 @@
1
- """A toolkit for evaluating the execution of arbitrary "stochastic functions", such as LLM calls.
2
-
3
- This package provides functionality for:
4
- - Creating and loading test datasets with structured inputs and outputs
5
- - Evaluating model performance using various metrics and evaluators
6
- - Generating reports for evaluation results
7
-
8
- TODO(DavidM): Implement serialization of reports for later comparison, and add git hashes etc.
9
- Note: I made pydantic_ai.evals.reports.EvalReport a BaseModel specifically to make this easier
10
- TODO(DavidM): Add commit hash, timestamp, and other metadata to reports (like pytest-speed does), possibly in a dedicated struct
11
- TODO(DavidM): Implement a CLI with some pytest-like filtering API to make it easier to run only specific cases
12
- """
13
-
14
- from .dataset import Case, Dataset
15
-
16
- __all__ = (
17
- 'Case',
18
- 'Dataset',
19
- )
File without changes
File without changes