pydantic-evals 1.0.17__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydantic-evals might be problematic. Click here for more details.
- pydantic_evals/reporting/__init__.py +64 -6
- {pydantic_evals-1.0.17.dist-info → pydantic_evals-1.1.0.dist-info}/METADATA +2 -2
- {pydantic_evals-1.0.17.dist-info → pydantic_evals-1.1.0.dist-info}/RECORD +5 -5
- {pydantic_evals-1.0.17.dist-info → pydantic_evals-1.1.0.dist-info}/WHEEL +0 -0
- {pydantic_evals-1.0.17.dist-info → pydantic_evals-1.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -206,11 +206,70 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
|
|
|
206
206
|
return ReportCaseAggregate.average(self.cases)
|
|
207
207
|
return None
|
|
208
208
|
|
|
209
|
+
def render(
|
|
210
|
+
self,
|
|
211
|
+
width: int | None = None,
|
|
212
|
+
baseline: EvaluationReport[InputsT, OutputT, MetadataT] | None = None,
|
|
213
|
+
*,
|
|
214
|
+
include_input: bool = False,
|
|
215
|
+
include_metadata: bool = False,
|
|
216
|
+
include_expected_output: bool = False,
|
|
217
|
+
include_output: bool = False,
|
|
218
|
+
include_durations: bool = True,
|
|
219
|
+
include_total_duration: bool = False,
|
|
220
|
+
include_removed_cases: bool = False,
|
|
221
|
+
include_averages: bool = True,
|
|
222
|
+
include_errors: bool = True,
|
|
223
|
+
include_error_stacktrace: bool = False,
|
|
224
|
+
include_evaluator_failures: bool = True,
|
|
225
|
+
input_config: RenderValueConfig | None = None,
|
|
226
|
+
metadata_config: RenderValueConfig | None = None,
|
|
227
|
+
output_config: RenderValueConfig | None = None,
|
|
228
|
+
score_configs: dict[str, RenderNumberConfig] | None = None,
|
|
229
|
+
label_configs: dict[str, RenderValueConfig] | None = None,
|
|
230
|
+
metric_configs: dict[str, RenderNumberConfig] | None = None,
|
|
231
|
+
duration_config: RenderNumberConfig | None = None,
|
|
232
|
+
include_reasons: bool = False,
|
|
233
|
+
) -> str: # pragma: no cover
|
|
234
|
+
"""Render this report to a nicely-formatted string, optionally comparing it to a baseline report.
|
|
235
|
+
|
|
236
|
+
If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
|
|
237
|
+
"""
|
|
238
|
+
io_file = StringIO()
|
|
239
|
+
console = Console(width=width, file=io_file)
|
|
240
|
+
self.print(
|
|
241
|
+
width=width,
|
|
242
|
+
baseline=baseline,
|
|
243
|
+
console=console,
|
|
244
|
+
include_input=include_input,
|
|
245
|
+
include_metadata=include_metadata,
|
|
246
|
+
include_expected_output=include_expected_output,
|
|
247
|
+
include_output=include_output,
|
|
248
|
+
include_durations=include_durations,
|
|
249
|
+
include_total_duration=include_total_duration,
|
|
250
|
+
include_removed_cases=include_removed_cases,
|
|
251
|
+
include_averages=include_averages,
|
|
252
|
+
include_errors=include_errors,
|
|
253
|
+
include_error_stacktrace=include_error_stacktrace,
|
|
254
|
+
include_evaluator_failures=include_evaluator_failures,
|
|
255
|
+
input_config=input_config,
|
|
256
|
+
metadata_config=metadata_config,
|
|
257
|
+
output_config=output_config,
|
|
258
|
+
score_configs=score_configs,
|
|
259
|
+
label_configs=label_configs,
|
|
260
|
+
metric_configs=metric_configs,
|
|
261
|
+
duration_config=duration_config,
|
|
262
|
+
include_reasons=include_reasons,
|
|
263
|
+
)
|
|
264
|
+
Console(file=io_file)
|
|
265
|
+
return io_file.getvalue()
|
|
266
|
+
|
|
209
267
|
def print(
|
|
210
268
|
self,
|
|
211
269
|
width: int | None = None,
|
|
212
270
|
baseline: EvaluationReport[InputsT, OutputT, MetadataT] | None = None,
|
|
213
271
|
*,
|
|
272
|
+
console: Console | None = None,
|
|
214
273
|
include_input: bool = False,
|
|
215
274
|
include_metadata: bool = False,
|
|
216
275
|
include_expected_output: bool = False,
|
|
@@ -230,11 +289,14 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
|
|
|
230
289
|
metric_configs: dict[str, RenderNumberConfig] | None = None,
|
|
231
290
|
duration_config: RenderNumberConfig | None = None,
|
|
232
291
|
include_reasons: bool = False,
|
|
233
|
-
): # pragma: no cover
|
|
292
|
+
) -> None: # pragma: no cover
|
|
234
293
|
"""Print this report to the console, optionally comparing it to a baseline report.
|
|
235
294
|
|
|
236
295
|
If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
|
|
237
296
|
"""
|
|
297
|
+
if console is None:
|
|
298
|
+
console = Console(width=width)
|
|
299
|
+
|
|
238
300
|
table = self.console_table(
|
|
239
301
|
baseline=baseline,
|
|
240
302
|
include_input=include_input,
|
|
@@ -255,7 +317,6 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
|
|
|
255
317
|
duration_config=duration_config,
|
|
256
318
|
include_reasons=include_reasons,
|
|
257
319
|
)
|
|
258
|
-
console = Console(width=width)
|
|
259
320
|
console.print(table)
|
|
260
321
|
if include_errors and self.failures:
|
|
261
322
|
failures_table = self.failures_table(
|
|
@@ -358,10 +419,7 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
|
|
|
358
419
|
|
|
359
420
|
def __str__(self) -> str: # pragma: lax no cover
|
|
360
421
|
"""Return a string representation of the report."""
|
|
361
|
-
|
|
362
|
-
io_file = StringIO()
|
|
363
|
-
Console(file=io_file).print(table)
|
|
364
|
-
return io_file.getvalue()
|
|
422
|
+
return self.render()
|
|
365
423
|
|
|
366
424
|
|
|
367
425
|
EvaluationReportAdapter = TypeAdapter(EvaluationReport[Any, Any, Any])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-evals
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev/evals
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -30,7 +30,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
30
30
|
Requires-Python: >=3.10
|
|
31
31
|
Requires-Dist: anyio>=0
|
|
32
32
|
Requires-Dist: logfire-api>=3.14.1
|
|
33
|
-
Requires-Dist: pydantic-ai-slim==1.0
|
|
33
|
+
Requires-Dist: pydantic-ai-slim==1.1.0
|
|
34
34
|
Requires-Dist: pydantic>=2.10
|
|
35
35
|
Requires-Dist: pyyaml>=6.0.2
|
|
36
36
|
Requires-Dist: rich>=13.9.4
|
|
@@ -15,9 +15,9 @@ pydantic_evals/otel/_context_in_memory_span_exporter.py,sha256=FrG0pXKjuvTp3bXNd
|
|
|
15
15
|
pydantic_evals/otel/_context_subtree.py,sha256=Iazp4w3IIBMCrkqWL-hTG-2QG_-2X81p794WG9MAsGk,1175
|
|
16
16
|
pydantic_evals/otel/_errors.py,sha256=aW1414eTofpA7R_DUgOeT-gj7YA6OXmm8Y4oYeFukD4,268
|
|
17
17
|
pydantic_evals/otel/span_tree.py,sha256=RzX4VGpEqc2QUhkyxMTXtBRo5yHHO1c0hI7QJJuiXPU,23043
|
|
18
|
-
pydantic_evals/reporting/__init__.py,sha256=
|
|
18
|
+
pydantic_evals/reporting/__init__.py,sha256=702W2BjMiXhKQz6T4sor6Zi2SjYTDQypCvealJrwTFA,54067
|
|
19
19
|
pydantic_evals/reporting/render_numbers.py,sha256=8SKlK3etbD7HnSWWHCE993ceCNLZCepVQ-SsqUIhyxk,6916
|
|
20
|
-
pydantic_evals-1.0.
|
|
21
|
-
pydantic_evals-1.0.
|
|
22
|
-
pydantic_evals-1.0.
|
|
23
|
-
pydantic_evals-1.0.
|
|
20
|
+
pydantic_evals-1.1.0.dist-info/METADATA,sha256=fwS-kXrKQIf2FC2jHnoTfp4hIVoJUTfzmzHi4TyQ2Ys,7844
|
|
21
|
+
pydantic_evals-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
+
pydantic_evals-1.1.0.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
|
|
23
|
+
pydantic_evals-1.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|