pydantic-evals 1.0.0b1__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-evals might be problematic. Click here for more details.

Files changed (24) hide show
  1. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/PKG-INFO +3 -4
  2. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/_utils.py +34 -2
  3. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/dataset.py +4 -4
  4. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/__init__.py +8 -2
  5. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/_run_evaluator.py +3 -5
  6. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/common.py +8 -18
  7. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/otel/_context_in_memory_span_exporter.py +1 -1
  8. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pyproject.toml +1 -2
  9. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/.gitignore +0 -0
  10. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/LICENSE +0 -0
  11. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/README.md +0 -0
  12. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/__init__.py +0 -0
  13. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/context.py +0 -0
  14. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/evaluator.py +0 -0
  15. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/llm_as_a_judge.py +0 -0
  16. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/spec.py +0 -0
  17. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/generation.py +0 -0
  18. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/otel/__init__.py +0 -0
  19. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/otel/_context_subtree.py +0 -0
  20. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/otel/_errors.py +0 -0
  21. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/otel/span_tree.py +0 -0
  22. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/py.typed +0 -0
  23. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/reporting/__init__.py +0 -0
  24. {pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/reporting/render_numbers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydantic-evals
3
- Version: 1.0.0b1
3
+ Version: 1.0.2
4
4
  Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
5
5
  Project-URL: Homepage, https://ai.pydantic.dev/evals
6
6
  Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -9,7 +9,7 @@ Project-URL: Changelog, https://github.com/pydantic/pydantic-ai/releases
9
9
  Author-email: Samuel Colvin <samuel@pydantic.dev>, Marcelo Trylesinski <marcelotryle@gmail.com>, David Montague <david@pydantic.dev>, Alex Hall <alex@pydantic.dev>, Douwe Maan <douwe@pydantic.dev>
10
10
  License-Expression: MIT
11
11
  License-File: LICENSE
12
- Classifier: Development Status :: 4 - Beta
12
+ Classifier: Development Status :: 5 - Production/Stable
13
13
  Classifier: Environment :: Console
14
14
  Classifier: Environment :: MacOS X
15
15
  Classifier: Intended Audience :: Developers
@@ -29,9 +29,8 @@ Classifier: Topic :: Internet
29
29
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
30
30
  Requires-Python: >=3.10
31
31
  Requires-Dist: anyio>=0
32
- Requires-Dist: eval-type-backport>=0; python_version < '3.11'
33
32
  Requires-Dist: logfire-api>=3.14.1
34
- Requires-Dist: pydantic-ai-slim==1.0.0b1
33
+ Requires-Dist: pydantic-ai-slim==1.0.2
35
34
  Requires-Dist: pydantic>=2.10
36
35
  Requires-Dist: pyyaml>=6.0.2
37
36
  Requires-Dist: rich>=13.9.4
@@ -2,13 +2,20 @@ from __future__ import annotations as _annotations
2
2
 
3
3
  import asyncio
4
4
  import inspect
5
- from collections.abc import Awaitable, Callable, Sequence
5
+ import warnings
6
+ from collections.abc import Awaitable, Callable, Generator, Sequence
7
+ from contextlib import contextmanager
6
8
  from functools import partial
7
- from typing import Any, TypeVar
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, TypeVar
8
11
 
9
12
  import anyio
13
+ import logfire_api
10
14
  from typing_extensions import ParamSpec, TypeIs
11
15
 
16
+ _logfire = logfire_api.Logfire(otel_scope='pydantic-evals')
17
+ logfire_api.add_non_user_code_prefix(Path(__file__).parent.absolute())
18
+
12
19
 
13
20
  class Unset:
14
21
  """A singleton to represent an unset value.
@@ -101,3 +108,28 @@ async def task_group_gather(tasks: Sequence[Callable[[], Awaitable[T]]]) -> list
101
108
  tg.start_soon(_run_task, task, i)
102
109
 
103
110
  return results
111
+
112
+
113
+ try:
114
+ from logfire._internal.config import (
115
+ LogfireNotConfiguredWarning, # pyright: ignore[reportAssignmentType,reportPrivateImportUsage]
116
+ )
117
+ # TODO: Remove this `pragma: no cover` once we test evals without pydantic-ai (which includes logfire)
118
+ except ImportError: # pragma: no cover
119
+
120
+ class LogfireNotConfiguredWarning(UserWarning):
121
+ pass
122
+
123
+
124
+ if TYPE_CHECKING:
125
+ logfire_span = _logfire.span
126
+ else:
127
+
128
+ @contextmanager
129
+ def logfire_span(*args: Any, **kwargs: Any) -> Generator[logfire_api.LogfireSpan, None, None]:
130
+ """Create a Logfire span without warning if logfire is not configured."""
131
+ # TODO: Remove once Logfire has the ability to suppress this warning from non-user code
132
+ with warnings.catch_warnings():
133
+ warnings.filterwarnings('ignore', category=LogfireNotConfiguredWarning)
134
+ with _logfire.span(*args, **kwargs) as span:
135
+ yield span
@@ -36,7 +36,7 @@ from typing_extensions import NotRequired, Self, TypedDict, TypeVar
36
36
 
37
37
  from pydantic_evals._utils import get_event_loop
38
38
 
39
- from ._utils import get_unwrapped_function_name, task_group_gather
39
+ from ._utils import get_unwrapped_function_name, logfire_span, task_group_gather
40
40
  from .evaluators import EvaluationResult, Evaluator
41
41
  from .evaluators._run_evaluator import run_evaluator
42
42
  from .evaluators.common import DEFAULT_EVALUATORS
@@ -283,7 +283,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
283
283
  limiter = anyio.Semaphore(max_concurrency) if max_concurrency is not None else AsyncExitStack()
284
284
 
285
285
  with (
286
- _logfire.span('evaluate {name}', name=name, n_cases=len(self.cases)) as eval_span,
286
+ logfire_span('evaluate {name}', name=name, n_cases=len(self.cases)) as eval_span,
287
287
  progress_bar or nullcontext(),
288
288
  ):
289
289
  task_id = progress_bar.add_task(f'Evaluating {name}', total=total_cases) if progress_bar else None
@@ -858,7 +858,7 @@ async def _run_task(
858
858
  token = _CURRENT_TASK_RUN.set(task_run_)
859
859
  try:
860
860
  with (
861
- _logfire.span('execute {task}', task=get_unwrapped_function_name(task)) as task_span,
861
+ logfire_span('execute {task}', task=get_unwrapped_function_name(task)) as task_span,
862
862
  context_subtree() as span_tree_,
863
863
  ):
864
864
  t0 = time.perf_counter()
@@ -933,7 +933,7 @@ async def _run_task_and_evaluators(
933
933
  trace_id: str | None = None
934
934
  span_id: str | None = None
935
935
  try:
936
- with _logfire.span(
936
+ with logfire_span(
937
937
  'case: {case_name}',
938
938
  task_name=get_unwrapped_function_name(task),
939
939
  case_name=report_case_name,
@@ -7,7 +7,6 @@ from .common import (
7
7
  LLMJudge,
8
8
  MaxDuration,
9
9
  OutputConfig,
10
- Python,
11
10
  )
12
11
  from .context import EvaluatorContext
13
12
  from .evaluator import EvaluationReason, EvaluationResult, Evaluator, EvaluatorFailure, EvaluatorOutput, EvaluatorSpec
@@ -22,7 +21,6 @@ __all__ = (
22
21
  'LLMJudge',
23
22
  'HasMatchingSpan',
24
23
  'OutputConfig',
25
- 'Python',
26
24
  # context
27
25
  'EvaluatorContext',
28
26
  # evaluator
@@ -34,3 +32,11 @@ __all__ = (
34
32
  'EvaluationReason',
35
33
  'EvaluationResult',
36
34
  )
35
+
36
+
37
+ def __getattr__(name: str):
38
+ if name == 'Python':
39
+ raise ImportError(
40
+ 'The `Python` evaluator has been removed for security reasons. See https://github.com/pydantic/pydantic-ai/pull/2808 for more details and a workaround.'
41
+ )
42
+ raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
@@ -2,16 +2,16 @@ from __future__ import annotations
2
2
 
3
3
  import traceback
4
4
  from collections.abc import Mapping
5
- from pathlib import Path
6
5
  from typing import TYPE_CHECKING, Any
7
6
 
8
- import logfire_api
9
7
  from pydantic import (
10
8
  TypeAdapter,
11
9
  ValidationError,
12
10
  )
13
11
  from typing_extensions import TypeVar
14
12
 
13
+ from pydantic_evals._utils import logfire_span
14
+
15
15
  from .context import EvaluatorContext
16
16
  from .evaluator import (
17
17
  EvaluationReason,
@@ -25,8 +25,6 @@ from .evaluator import (
25
25
  if TYPE_CHECKING:
26
26
  from pydantic_ai.retries import RetryConfig
27
27
 
28
- _logfire = logfire_api.Logfire(otel_scope='pydantic-evals')
29
- logfire_api.add_non_user_code_prefix(Path(__file__).parent.absolute())
30
28
 
31
29
  InputsT = TypeVar('InputsT', default=Any, contravariant=True)
32
30
  OutputT = TypeVar('OutputT', default=Any, contravariant=True)
@@ -62,7 +60,7 @@ async def run_evaluator(
62
60
  evaluate = tenacity_retry(**retry)(evaluate)
63
61
 
64
62
  try:
65
- with _logfire.span(
63
+ with logfire_span(
66
64
  'evaluator: {evaluator_name}',
67
65
  evaluator_name=evaluator.get_default_evaluation_name(),
68
66
  ):
@@ -21,7 +21,6 @@ __all__ = (
21
21
  'MaxDuration',
22
22
  'LLMJudge',
23
23
  'HasMatchingSpan',
24
- 'Python',
25
24
  'OutputConfig',
26
25
  )
27
26
 
@@ -268,22 +267,6 @@ class HasMatchingSpan(Evaluator[object, object, object]):
268
267
  return ctx.span_tree.any(self.query)
269
268
 
270
269
 
271
- # TODO: Consider moving this to docs rather than providing it with the library, given the security implications
272
- @dataclass(repr=False)
273
- class Python(Evaluator[object, object, object]):
274
- """The output of this evaluator is the result of evaluating the provided Python expression.
275
-
276
- ***WARNING***: this evaluator runs arbitrary Python code, so you should ***NEVER*** use it with untrusted inputs.
277
- """
278
-
279
- expression: str
280
- evaluation_name: str | None = field(default=None)
281
-
282
- def evaluate(self, ctx: EvaluatorContext[object, object, object]) -> EvaluatorOutput:
283
- # Evaluate the condition, exposing access to the evaluator context as `ctx`.
284
- return eval(self.expression, {'ctx': ctx})
285
-
286
-
287
270
  DEFAULT_EVALUATORS: tuple[type[Evaluator[object, object, object]], ...] = (
288
271
  Equals,
289
272
  EqualsExpected,
@@ -292,5 +275,12 @@ DEFAULT_EVALUATORS: tuple[type[Evaluator[object, object, object]], ...] = (
292
275
  MaxDuration,
293
276
  LLMJudge,
294
277
  HasMatchingSpan,
295
- # Python, # not included by default for security reasons
296
278
  )
279
+
280
+
281
+ def __getattr__(name: str):
282
+ if name == 'Python':
283
+ raise ImportError(
284
+ 'The `Python` evaluator has been removed for security reasons. See https://github.com/pydantic/pydantic-ai/pull/2808 for more details and a workaround.'
285
+ )
286
+ raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
@@ -18,7 +18,7 @@ try:
18
18
  )
19
19
 
20
20
  _LOGFIRE_IS_INSTALLED = True
21
- except ImportError: # pragma: no cover
21
+ except ImportError: # pragma: lax no cover
22
22
  _LOGFIRE_IS_INSTALLED = False # pyright: ignore[reportConstantRedefinition]
23
23
 
24
24
  # Ensure that we can do an isinstance check without erroring
@@ -24,7 +24,7 @@ authors = [
24
24
  license = "MIT"
25
25
  readme = "README.md"
26
26
  classifiers = [
27
- "Development Status :: 4 - Beta",
27
+ "Development Status :: 5 - Production/Stable",
28
28
  "Programming Language :: Python",
29
29
  "Programming Language :: Python :: 3",
30
30
  "Programming Language :: Python :: 3 :: Only",
@@ -52,7 +52,6 @@ dependencies = [
52
52
  "pydantic>=2.10",
53
53
  "pydantic-ai-slim=={{ version }}",
54
54
  "anyio>=0",
55
- "eval-type-backport>=0; python_version < '3.11'",
56
55
  "pyyaml>=6.0.2",
57
56
  ]
58
57
 
File without changes