pydantic-evals 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydantic_evals/dataset.py +10 -5
- {pydantic_evals-0.4.0.dist-info → pydantic_evals-0.4.1.dist-info}/METADATA +2 -2
- {pydantic_evals-0.4.0.dist-info → pydantic_evals-0.4.1.dist-info}/RECORD +5 -5
- {pydantic_evals-0.4.0.dist-info → pydantic_evals-0.4.1.dist-info}/WHEEL +0 -0
- {pydantic_evals-0.4.0.dist-info → pydantic_evals-0.4.1.dist-info}/licenses/LICENSE +0 -0
pydantic_evals/dataset.py
CHANGED
|
@@ -18,12 +18,14 @@ from collections.abc import Awaitable, Mapping, Sequence
|
|
|
18
18
|
from contextlib import AsyncExitStack, nullcontext
|
|
19
19
|
from contextvars import ContextVar
|
|
20
20
|
from dataclasses import dataclass, field
|
|
21
|
+
from inspect import iscoroutinefunction
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from typing import Any, Callable, Generic, Literal, Union, cast
|
|
23
24
|
|
|
24
25
|
import anyio
|
|
25
26
|
import logfire_api
|
|
26
27
|
import yaml
|
|
28
|
+
from anyio import to_thread
|
|
27
29
|
from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, model_serializer
|
|
28
30
|
from pydantic._internal import _typing_extra
|
|
29
31
|
from pydantic_core import to_json
|
|
@@ -253,7 +255,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
|
|
|
253
255
|
|
|
254
256
|
async def evaluate(
|
|
255
257
|
self,
|
|
256
|
-
task: Callable[[InputsT], Awaitable[OutputT]],
|
|
258
|
+
task: Callable[[InputsT], Awaitable[OutputT]] | Callable[[InputsT], OutputT],
|
|
257
259
|
name: str | None = None,
|
|
258
260
|
max_concurrency: int | None = None,
|
|
259
261
|
progress: bool = True,
|
|
@@ -308,7 +310,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
|
|
|
308
310
|
|
|
309
311
|
def evaluate_sync(
|
|
310
312
|
self,
|
|
311
|
-
task: Callable[[InputsT], Awaitable[OutputT]],
|
|
313
|
+
task: Callable[[InputsT], Awaitable[OutputT]] | Callable[[InputsT], OutputT],
|
|
312
314
|
name: str | None = None,
|
|
313
315
|
max_concurrency: int | None = None,
|
|
314
316
|
progress: bool = True,
|
|
@@ -811,7 +813,7 @@ class _TaskRun:
|
|
|
811
813
|
|
|
812
814
|
|
|
813
815
|
async def _run_task(
|
|
814
|
-
task: Callable[[InputsT], Awaitable[OutputT]], case: Case[InputsT, OutputT, MetadataT]
|
|
816
|
+
task: Callable[[InputsT], Awaitable[OutputT] | OutputT], case: Case[InputsT, OutputT, MetadataT]
|
|
815
817
|
) -> EvaluatorContext[InputsT, OutputT, MetadataT]:
|
|
816
818
|
"""Run a task on a case and return the context for evaluators.
|
|
817
819
|
|
|
@@ -836,7 +838,10 @@ async def _run_task(
|
|
|
836
838
|
with _logfire.span('execute {task}', task=get_unwrapped_function_name(task)) as task_span:
|
|
837
839
|
with context_subtree() as span_tree:
|
|
838
840
|
t0 = time.perf_counter()
|
|
839
|
-
|
|
841
|
+
if iscoroutinefunction(task):
|
|
842
|
+
task_output = cast(OutputT, await task(case.inputs))
|
|
843
|
+
else:
|
|
844
|
+
task_output = cast(OutputT, await to_thread.run_sync(task, case.inputs))
|
|
840
845
|
fallback_duration = time.perf_counter() - t0
|
|
841
846
|
finally:
|
|
842
847
|
_CURRENT_TASK_RUN.reset(token)
|
|
@@ -873,7 +878,7 @@ async def _run_task(
|
|
|
873
878
|
|
|
874
879
|
|
|
875
880
|
async def _run_task_and_evaluators(
|
|
876
|
-
task: Callable[[InputsT], Awaitable[OutputT]],
|
|
881
|
+
task: Callable[[InputsT], Awaitable[OutputT]] | Callable[[InputsT], OutputT],
|
|
877
882
|
case: Case[InputsT, OutputT, MetadataT],
|
|
878
883
|
report_case_name: str,
|
|
879
884
|
dataset_evaluators: list[Evaluator[InputsT, OutputT, MetadataT]],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-evals
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev/evals
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -32,7 +32,7 @@ Requires-Python: >=3.9
|
|
|
32
32
|
Requires-Dist: anyio>=0
|
|
33
33
|
Requires-Dist: eval-type-backport>=0; python_version < '3.11'
|
|
34
34
|
Requires-Dist: logfire-api>=1.2.0
|
|
35
|
-
Requires-Dist: pydantic-ai-slim==0.4.
|
|
35
|
+
Requires-Dist: pydantic-ai-slim==0.4.1
|
|
36
36
|
Requires-Dist: pydantic>=2.10
|
|
37
37
|
Requires-Dist: pyyaml>=6.0.2
|
|
38
38
|
Requires-Dist: rich>=13.9.4
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
pydantic_evals/__init__.py,sha256=OKRbfhdc8UZPzrPJMZUQwvzIxLhXmEZxz1ZuD921fy4,839
|
|
2
2
|
pydantic_evals/_utils.py,sha256=PfhmPbdQp-q90s568LuG45zDDXxgO13BEz8MQJK8qw4,2922
|
|
3
|
-
pydantic_evals/dataset.py,sha256
|
|
3
|
+
pydantic_evals/dataset.py,sha256=SY0k2htYG0d0KRRem3pnQdN7rPztJ_TCFnCb0zkXbCk,46477
|
|
4
4
|
pydantic_evals/generation.py,sha256=-w-4-zpJuW8mLj5ed60LUYm--b-2G42p-UDuPhOQgRE,3492
|
|
5
5
|
pydantic_evals/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
pydantic_evals/evaluators/__init__.py,sha256=uj110viFiDvqrIcuDcWexk_uBgJqhKMGPHT2YvDb7LA,624
|
|
@@ -17,7 +17,7 @@ pydantic_evals/otel/_errors.py,sha256=aW1414eTofpA7R_DUgOeT-gj7YA6OXmm8Y4oYeFukD
|
|
|
17
17
|
pydantic_evals/otel/span_tree.py,sha256=LV5Hsyo4riJzevHyBz8wxP82S-ry5zeKYi9bKWjGCS8,23057
|
|
18
18
|
pydantic_evals/reporting/__init__.py,sha256=k_3tteqXGh0yGvgpN68gB0CjG9wzrakzDTve2GHend4,42148
|
|
19
19
|
pydantic_evals/reporting/render_numbers.py,sha256=8SKlK3etbD7HnSWWHCE993ceCNLZCepVQ-SsqUIhyxk,6916
|
|
20
|
-
pydantic_evals-0.4.
|
|
21
|
-
pydantic_evals-0.4.
|
|
22
|
-
pydantic_evals-0.4.
|
|
23
|
-
pydantic_evals-0.4.
|
|
20
|
+
pydantic_evals-0.4.1.dist-info/METADATA,sha256=IXq49FDCjJBQQ_mMPuZyljAXQaDvN-OZk21js1DlN9Q,7785
|
|
21
|
+
pydantic_evals-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
+
pydantic_evals-0.4.1.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
|
|
23
|
+
pydantic_evals-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|