pydantic-evals 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydantic-evals might be problematic. Click here for more details.
- pydantic_evals/dataset.py +10 -2
- pydantic_evals/reporting/__init__.py +9 -2
- {pydantic_evals-0.7.4.dist-info → pydantic_evals-0.7.5.dist-info}/METADATA +2 -2
- {pydantic_evals-0.7.4.dist-info → pydantic_evals-0.7.5.dist-info}/RECORD +6 -6
- {pydantic_evals-0.7.4.dist-info → pydantic_evals-0.7.5.dist-info}/WHEEL +0 -0
- {pydantic_evals-0.7.4.dist-info → pydantic_evals-0.7.5.dist-info}/licenses/LICENSE +0 -0
pydantic_evals/dataset.py
CHANGED
|
@@ -297,6 +297,12 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
|
|
|
297
297
|
progress_bar.update(task_id, advance=1)
|
|
298
298
|
return result
|
|
299
299
|
|
|
300
|
+
if (context := eval_span.context) is None: # pragma: no cover
|
|
301
|
+
trace_id = None
|
|
302
|
+
span_id = None
|
|
303
|
+
else:
|
|
304
|
+
trace_id = f'{context.trace_id:032x}'
|
|
305
|
+
span_id = f'{context.span_id:016x}'
|
|
300
306
|
report = EvaluationReport(
|
|
301
307
|
name=name,
|
|
302
308
|
cases=await task_group_gather(
|
|
@@ -305,6 +311,8 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
|
|
|
305
311
|
for i, case in enumerate(self.cases, 1)
|
|
306
312
|
]
|
|
307
313
|
),
|
|
314
|
+
span_id=span_id,
|
|
315
|
+
trace_id=trace_id,
|
|
308
316
|
)
|
|
309
317
|
# TODO(DavidM): This attribute will be too big in general; remove it once we can use child spans in details panel:
|
|
310
318
|
eval_span.set_attribute('cases', _REPORT_CASES_ADAPTER.dump_python(report.cases))
|
|
@@ -929,8 +937,8 @@ async def _run_task_and_evaluators(
|
|
|
929
937
|
|
|
930
938
|
context = case_span.context
|
|
931
939
|
if context is None: # pragma: no cover
|
|
932
|
-
trace_id =
|
|
933
|
-
span_id =
|
|
940
|
+
trace_id = None
|
|
941
|
+
span_id = None
|
|
934
942
|
else:
|
|
935
943
|
trace_id = f'{context.trace_id:032x}'
|
|
936
944
|
span_id = f'{context.span_id:016x}'
|
|
@@ -68,8 +68,8 @@ class ReportCase(Generic[InputsT, OutputT, MetadataT]):
|
|
|
68
68
|
total_duration: float # includes evaluator execution time
|
|
69
69
|
|
|
70
70
|
# TODO(DavidM): Drop these once we can reference child spans in details panel:
|
|
71
|
-
trace_id: str
|
|
72
|
-
span_id: str
|
|
71
|
+
trace_id: str | None
|
|
72
|
+
span_id: str | None
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
ReportCaseAdapter = TypeAdapter(ReportCase[Any, Any, Any])
|
|
@@ -158,9 +158,16 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
|
|
|
158
158
|
|
|
159
159
|
name: str
|
|
160
160
|
"""The name of the report."""
|
|
161
|
+
|
|
161
162
|
cases: list[ReportCase[InputsT, OutputT, MetadataT]]
|
|
162
163
|
"""The cases in the report."""
|
|
163
164
|
|
|
165
|
+
span_id: str | None = None
|
|
166
|
+
"""The span ID of the evaluation."""
|
|
167
|
+
|
|
168
|
+
trace_id: str | None = None
|
|
169
|
+
"""The trace ID of the evaluation."""
|
|
170
|
+
|
|
164
171
|
def averages(self) -> ReportCaseAggregate:
|
|
165
172
|
return ReportCaseAggregate.average(self.cases)
|
|
166
173
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-evals
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.5
|
|
4
4
|
Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev/evals
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -32,7 +32,7 @@ Requires-Python: >=3.9
|
|
|
32
32
|
Requires-Dist: anyio>=0
|
|
33
33
|
Requires-Dist: eval-type-backport>=0; python_version < '3.11'
|
|
34
34
|
Requires-Dist: logfire-api>=3.14.1
|
|
35
|
-
Requires-Dist: pydantic-ai-slim==0.7.
|
|
35
|
+
Requires-Dist: pydantic-ai-slim==0.7.5
|
|
36
36
|
Requires-Dist: pydantic>=2.10
|
|
37
37
|
Requires-Dist: pyyaml>=6.0.2
|
|
38
38
|
Requires-Dist: rich>=13.9.4
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
pydantic_evals/__init__.py,sha256=OKRbfhdc8UZPzrPJMZUQwvzIxLhXmEZxz1ZuD921fy4,839
|
|
2
2
|
pydantic_evals/_utils.py,sha256=PfhmPbdQp-q90s568LuG45zDDXxgO13BEz8MQJK8qw4,2922
|
|
3
|
-
pydantic_evals/dataset.py,sha256=
|
|
3
|
+
pydantic_evals/dataset.py,sha256=NSEKJLqKfVVu0YpmPJsb6rygwgglyc7h6_dIZAsKqqk,47036
|
|
4
4
|
pydantic_evals/generation.py,sha256=Yd1rfbsDjjBBHDk-1KDu48hlITjM2-74rTnPBD_sqbA,3494
|
|
5
5
|
pydantic_evals/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
pydantic_evals/evaluators/__init__.py,sha256=k_sTzRezIOUj7L2x3WC0_r8943jRo72uqb2vKplD5EU,660
|
|
@@ -15,9 +15,9 @@ pydantic_evals/otel/_context_in_memory_span_exporter.py,sha256=sQBaIH7NM_bCPnew9
|
|
|
15
15
|
pydantic_evals/otel/_context_subtree.py,sha256=Iazp4w3IIBMCrkqWL-hTG-2QG_-2X81p794WG9MAsGk,1175
|
|
16
16
|
pydantic_evals/otel/_errors.py,sha256=aW1414eTofpA7R_DUgOeT-gj7YA6OXmm8Y4oYeFukD4,268
|
|
17
17
|
pydantic_evals/otel/span_tree.py,sha256=LV5Hsyo4riJzevHyBz8wxP82S-ry5zeKYi9bKWjGCS8,23057
|
|
18
|
-
pydantic_evals/reporting/__init__.py,sha256=
|
|
18
|
+
pydantic_evals/reporting/__init__.py,sha256=J6xdy6b1MG1kk47Za3oKu0c-RXD8jEi2TYGkRU1_qZU,43396
|
|
19
19
|
pydantic_evals/reporting/render_numbers.py,sha256=8SKlK3etbD7HnSWWHCE993ceCNLZCepVQ-SsqUIhyxk,6916
|
|
20
|
-
pydantic_evals-0.7.
|
|
21
|
-
pydantic_evals-0.7.
|
|
22
|
-
pydantic_evals-0.7.
|
|
23
|
-
pydantic_evals-0.7.
|
|
20
|
+
pydantic_evals-0.7.5.dist-info/METADATA,sha256=9fkNsIxmLfG5mXsu9u62AAZEK0AiuVeVwWTG9D5EX3U,7942
|
|
21
|
+
pydantic_evals-0.7.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
+
pydantic_evals-0.7.5.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
|
|
23
|
+
pydantic_evals-0.7.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|