pydantic-evals 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-evals might be problematic. Click here for more details.

Files changed (24) hide show
  1. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/PKG-INFO +2 -2
  2. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/dataset.py +10 -5
  3. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/.gitignore +0 -0
  4. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/LICENSE +0 -0
  5. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/README.md +0 -0
  6. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/__init__.py +0 -0
  7. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/_utils.py +0 -0
  8. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/__init__.py +0 -0
  9. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/_run_evaluator.py +0 -0
  10. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/_spec.py +0 -0
  11. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/common.py +0 -0
  12. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/context.py +0 -0
  13. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/evaluator.py +0 -0
  14. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/evaluators/llm_as_a_judge.py +0 -0
  15. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/generation.py +0 -0
  16. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/otel/__init__.py +0 -0
  17. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/otel/_context_in_memory_span_exporter.py +0 -0
  18. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/otel/_context_subtree.py +0 -0
  19. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/otel/_errors.py +0 -0
  20. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/otel/span_tree.py +0 -0
  21. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/py.typed +0 -0
  22. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/reporting/__init__.py +0 -0
  23. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pydantic_evals/reporting/render_numbers.py +0 -0
  24. {pydantic_evals-0.4.0 → pydantic_evals-0.4.2}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydantic-evals
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
5
5
  Project-URL: Homepage, https://ai.pydantic.dev/evals
6
6
  Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -32,7 +32,7 @@ Requires-Python: >=3.9
32
32
  Requires-Dist: anyio>=0
33
33
  Requires-Dist: eval-type-backport>=0; python_version < '3.11'
34
34
  Requires-Dist: logfire-api>=1.2.0
35
- Requires-Dist: pydantic-ai-slim==0.4.0
35
+ Requires-Dist: pydantic-ai-slim==0.4.2
36
36
  Requires-Dist: pydantic>=2.10
37
37
  Requires-Dist: pyyaml>=6.0.2
38
38
  Requires-Dist: rich>=13.9.4
@@ -18,12 +18,14 @@ from collections.abc import Awaitable, Mapping, Sequence
18
18
  from contextlib import AsyncExitStack, nullcontext
19
19
  from contextvars import ContextVar
20
20
  from dataclasses import dataclass, field
21
+ from inspect import iscoroutinefunction
21
22
  from pathlib import Path
22
23
  from typing import Any, Callable, Generic, Literal, Union, cast
23
24
 
24
25
  import anyio
25
26
  import logfire_api
26
27
  import yaml
28
+ from anyio import to_thread
27
29
  from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, model_serializer
28
30
  from pydantic._internal import _typing_extra
29
31
  from pydantic_core import to_json
@@ -253,7 +255,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
253
255
 
254
256
  async def evaluate(
255
257
  self,
256
- task: Callable[[InputsT], Awaitable[OutputT]],
258
+ task: Callable[[InputsT], Awaitable[OutputT]] | Callable[[InputsT], OutputT],
257
259
  name: str | None = None,
258
260
  max_concurrency: int | None = None,
259
261
  progress: bool = True,
@@ -308,7 +310,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
308
310
 
309
311
  def evaluate_sync(
310
312
  self,
311
- task: Callable[[InputsT], Awaitable[OutputT]],
313
+ task: Callable[[InputsT], Awaitable[OutputT]] | Callable[[InputsT], OutputT],
312
314
  name: str | None = None,
313
315
  max_concurrency: int | None = None,
314
316
  progress: bool = True,
@@ -811,7 +813,7 @@ class _TaskRun:
811
813
 
812
814
 
813
815
  async def _run_task(
814
- task: Callable[[InputsT], Awaitable[OutputT]], case: Case[InputsT, OutputT, MetadataT]
816
+ task: Callable[[InputsT], Awaitable[OutputT] | OutputT], case: Case[InputsT, OutputT, MetadataT]
815
817
  ) -> EvaluatorContext[InputsT, OutputT, MetadataT]:
816
818
  """Run a task on a case and return the context for evaluators.
817
819
 
@@ -836,7 +838,10 @@ async def _run_task(
836
838
  with _logfire.span('execute {task}', task=get_unwrapped_function_name(task)) as task_span:
837
839
  with context_subtree() as span_tree:
838
840
  t0 = time.perf_counter()
839
- task_output = await task(case.inputs)
841
+ if iscoroutinefunction(task):
842
+ task_output = cast(OutputT, await task(case.inputs))
843
+ else:
844
+ task_output = cast(OutputT, await to_thread.run_sync(task, case.inputs))
840
845
  fallback_duration = time.perf_counter() - t0
841
846
  finally:
842
847
  _CURRENT_TASK_RUN.reset(token)
@@ -873,7 +878,7 @@ async def _run_task(
873
878
 
874
879
 
875
880
  async def _run_task_and_evaluators(
876
- task: Callable[[InputsT], Awaitable[OutputT]],
881
+ task: Callable[[InputsT], Awaitable[OutputT]] | Callable[[InputsT], OutputT],
877
882
  case: Case[InputsT, OutputT, MetadataT],
878
883
  report_case_name: str,
879
884
  dataset_evaluators: list[Evaluator[InputsT, OutputT, MetadataT]],
File without changes
File without changes