pydantic-evals 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydantic-evals might be problematic. Click here for more details.
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/PKG-INFO +2 -2
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/generation.py +3 -1
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/.gitignore +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/LICENSE +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/README.md +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/__init__.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/_utils.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/dataset.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/__init__.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/_run_evaluator.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/common.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/context.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/evaluator.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/llm_as_a_judge.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/evaluators/spec.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/otel/__init__.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/otel/_context_in_memory_span_exporter.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/otel/_context_subtree.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/otel/_errors.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/otel/span_tree.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/py.typed +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/reporting/__init__.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pydantic_evals/reporting/render_numbers.py +0 -0
- {pydantic_evals-1.1.0 → pydantic_evals-1.2.0}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-evals
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev/evals
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -30,7 +30,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
30
30
|
Requires-Python: >=3.10
|
|
31
31
|
Requires-Dist: anyio>=0
|
|
32
32
|
Requires-Dist: logfire-api>=3.14.1
|
|
33
|
-
Requires-Dist: pydantic-ai-slim==1.
|
|
33
|
+
Requires-Dist: pydantic-ai-slim==1.2.0
|
|
34
34
|
Requires-Dist: pydantic>=2.10
|
|
35
35
|
Requires-Dist: pyyaml>=6.0.2
|
|
36
36
|
Requires-Dist: rich>=13.9.4
|
|
@@ -14,6 +14,7 @@ from pydantic import ValidationError
|
|
|
14
14
|
from typing_extensions import TypeVar
|
|
15
15
|
|
|
16
16
|
from pydantic_ai import Agent, models
|
|
17
|
+
from pydantic_ai._utils import strip_markdown_fences
|
|
17
18
|
from pydantic_evals import Dataset
|
|
18
19
|
from pydantic_evals.evaluators.evaluator import Evaluator
|
|
19
20
|
|
|
@@ -73,8 +74,9 @@ async def generate_dataset(
|
|
|
73
74
|
)
|
|
74
75
|
|
|
75
76
|
result = await agent.run(extra_instructions or 'Please generate the object.')
|
|
77
|
+
output = strip_markdown_fences(result.output)
|
|
76
78
|
try:
|
|
77
|
-
result = dataset_type.from_text(
|
|
79
|
+
result = dataset_type.from_text(output, fmt='json', custom_evaluator_types=custom_evaluator_types)
|
|
78
80
|
except ValidationError as e: # pragma: no cover
|
|
79
81
|
print(f'Raw response from model:\n{result.output}')
|
|
80
82
|
raise e
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|