PyPI - pydantic-evals - Versions diffs - 1.0.0b1__tar.gz → 1.0.2__tar.gz - Mend

pydantic-evals 1.0.0b1tar.gz → 1.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-evals might be problematic. Click here for more details.

Files changed (24) hide show

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-evals
-Version: 1.0.0b1
+Version: 1.0.2
 Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
 Project-URL: Homepage, https://ai.pydantic.dev/evals
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -9,7 +9,7 @@ Project-URL: Changelog, https://github.com/pydantic/pydantic-ai/releases
 Author-email: Samuel Colvin <samuel@pydantic.dev>, Marcelo Trylesinski <marcelotryle@gmail.com>, David Montague <david@pydantic.dev>, Alex Hall <alex@pydantic.dev>, Douwe Maan <douwe@pydantic.dev>
 License-Expression: MIT
 License-File: LICENSE
-Classifier: Development Status :: 4 - Beta
+Classifier: Development Status :: 5 - Production/Stable
 Classifier: Environment :: Console
 Classifier: Environment :: MacOS X
 Classifier: Intended Audience :: Developers
@@ -29,9 +29,8 @@ Classifier: Topic :: Internet
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
 Requires-Dist: anyio>=0
-Requires-Dist: eval-type-backport>=0; python_version < '3.11'
 Requires-Dist: logfire-api>=3.14.1
-Requires-Dist: pydantic-ai-slim==1.0.0b1
+Requires-Dist: pydantic-ai-slim==1.0.2
 Requires-Dist: pydantic>=2.10
 Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: rich>=13.9.4

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/_utils.py RENAMED Viewed

@@ -2,13 +2,20 @@ from __future__ import annotations as _annotations
 import asyncio
 import inspect
-from collections.abc import Awaitable, Callable, Sequence
+import warnings
+from collections.abc import Awaitable, Callable, Generator, Sequence
+from contextlib import contextmanager
 from functools import partial
-from typing import Any, TypeVar
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, TypeVar
 import anyio
+import logfire_api
 from typing_extensions import ParamSpec, TypeIs
+_logfire = logfire_api.Logfire(otel_scope='pydantic-evals')
+logfire_api.add_non_user_code_prefix(Path(__file__).parent.absolute())
 class Unset:
     """A singleton to represent an unset value.
@@ -101,3 +108,28 @@ async def task_group_gather(tasks: Sequence[Callable[[], Awaitable[T]]]) -> list
             tg.start_soon(_run_task, task, i)
     return results
+try:
+    from logfire._internal.config import (
+        LogfireNotConfiguredWarning,  # pyright: ignore[reportAssignmentType,reportPrivateImportUsage]
+    )
+# TODO: Remove this `pragma: no cover` once we test evals without pydantic-ai (which includes logfire)
+except ImportError:  # pragma: no cover
+    class LogfireNotConfiguredWarning(UserWarning):
+        pass
+if TYPE_CHECKING:
+    logfire_span = _logfire.span
+else:
+    @contextmanager
+    def logfire_span(*args: Any, **kwargs: Any) -> Generator[logfire_api.LogfireSpan, None, None]:
+        """Create a Logfire span without warning if logfire is not configured."""
+        # TODO: Remove once Logfire has the ability to suppress this warning from non-user code
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=LogfireNotConfiguredWarning)
+            with _logfire.span(*args, **kwargs) as span:
+                yield span

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/dataset.py RENAMED Viewed

@@ -36,7 +36,7 @@ from typing_extensions import NotRequired, Self, TypedDict, TypeVar
 from pydantic_evals._utils import get_event_loop
-from ._utils import get_unwrapped_function_name, task_group_gather
+from ._utils import get_unwrapped_function_name, logfire_span, task_group_gather
 from .evaluators import EvaluationResult, Evaluator
 from .evaluators._run_evaluator import run_evaluator
 from .evaluators.common import DEFAULT_EVALUATORS
@@ -283,7 +283,7 @@ class Dataset(BaseModel, Generic[InputsT, OutputT, MetadataT], extra='forbid', a
         limiter = anyio.Semaphore(max_concurrency) if max_concurrency is not None else AsyncExitStack()
         with (
-            _logfire.span('evaluate {name}', name=name, n_cases=len(self.cases)) as eval_span,
+            logfire_span('evaluate {name}', name=name, n_cases=len(self.cases)) as eval_span,
             progress_bar or nullcontext(),
         ):
             task_id = progress_bar.add_task(f'Evaluating {name}', total=total_cases) if progress_bar else None
@@ -858,7 +858,7 @@ async def _run_task(
         token = _CURRENT_TASK_RUN.set(task_run_)
         try:
             with (
-                _logfire.span('execute {task}', task=get_unwrapped_function_name(task)) as task_span,
+                logfire_span('execute {task}', task=get_unwrapped_function_name(task)) as task_span,
                 context_subtree() as span_tree_,
             ):
                 t0 = time.perf_counter()
@@ -933,7 +933,7 @@ async def _run_task_and_evaluators(
     trace_id: str | None = None
     span_id: str | None = None
     try:
-        with _logfire.span(
+        with logfire_span(
             'case: {case_name}',
             task_name=get_unwrapped_function_name(task),
             case_name=report_case_name,

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/__init__.py RENAMED Viewed

@@ -7,7 +7,6 @@ from .common import (
     LLMJudge,
     MaxDuration,
     OutputConfig,
-    Python,
 )
 from .context import EvaluatorContext
 from .evaluator import EvaluationReason, EvaluationResult, Evaluator, EvaluatorFailure, EvaluatorOutput, EvaluatorSpec
@@ -22,7 +21,6 @@ __all__ = (
     'LLMJudge',
     'HasMatchingSpan',
     'OutputConfig',
-    'Python',
     # context
     'EvaluatorContext',
     # evaluator
@@ -34,3 +32,11 @@ __all__ = (
     'EvaluationReason',
     'EvaluationResult',
 )
+def __getattr__(name: str):
+    if name == 'Python':
+        raise ImportError(
+            'The `Python` evaluator has been removed for security reasons. See https://github.com/pydantic/pydantic-ai/pull/2808 for more details and a workaround.'
+        )
+    raise AttributeError(f'module {__name__!r} has no attribute {name!r}')

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/_run_evaluator.py RENAMED Viewed

@@ -2,16 +2,16 @@ from __future__ import annotations
 import traceback
 from collections.abc import Mapping
-from pathlib import Path
 from typing import TYPE_CHECKING, Any
-import logfire_api
 from pydantic import (
     TypeAdapter,
     ValidationError,
 )
 from typing_extensions import TypeVar
+from pydantic_evals._utils import logfire_span
 from .context import EvaluatorContext
 from .evaluator import (
     EvaluationReason,
@@ -25,8 +25,6 @@ from .evaluator import (
 if TYPE_CHECKING:
     from pydantic_ai.retries import RetryConfig
-_logfire = logfire_api.Logfire(otel_scope='pydantic-evals')
-logfire_api.add_non_user_code_prefix(Path(__file__).parent.absolute())
 InputsT = TypeVar('InputsT', default=Any, contravariant=True)
 OutputT = TypeVar('OutputT', default=Any, contravariant=True)
@@ -62,7 +60,7 @@ async def run_evaluator(
         evaluate = tenacity_retry(**retry)(evaluate)
     try:
-        with _logfire.span(
+        with logfire_span(
             'evaluator: {evaluator_name}',
             evaluator_name=evaluator.get_default_evaluation_name(),
         ):

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/evaluators/common.py RENAMED Viewed

@@ -21,7 +21,6 @@ __all__ = (
     'MaxDuration',
     'LLMJudge',
     'HasMatchingSpan',
-    'Python',
     'OutputConfig',
 )
@@ -268,22 +267,6 @@ class HasMatchingSpan(Evaluator[object, object, object]):
         return ctx.span_tree.any(self.query)
-# TODO: Consider moving this to docs rather than providing it with the library, given the security implications
-@dataclass(repr=False)
-class Python(Evaluator[object, object, object]):
-    """The output of this evaluator is the result of evaluating the provided Python expression.
-    ***WARNING***: this evaluator runs arbitrary Python code, so you should ***NEVER*** use it with untrusted inputs.
-    """
-    expression: str
-    evaluation_name: str | None = field(default=None)
-    def evaluate(self, ctx: EvaluatorContext[object, object, object]) -> EvaluatorOutput:
-        # Evaluate the condition, exposing access to the evaluator context as `ctx`.
-        return eval(self.expression, {'ctx': ctx})
 DEFAULT_EVALUATORS: tuple[type[Evaluator[object, object, object]], ...] = (
     Equals,
     EqualsExpected,
@@ -292,5 +275,12 @@ DEFAULT_EVALUATORS: tuple[type[Evaluator[object, object, object]], ...] = (
     MaxDuration,
     LLMJudge,
     HasMatchingSpan,
-    # Python,  # not included by default for security reasons
 )
+def __getattr__(name: str):
+    if name == 'Python':
+        raise ImportError(
+            'The `Python` evaluator has been removed for security reasons. See https://github.com/pydantic/pydantic-ai/pull/2808 for more details and a workaround.'
+        )
+    raise AttributeError(f'module {__name__!r} has no attribute {name!r}')

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pydantic_evals/otel/_context_in_memory_span_exporter.py RENAMED Viewed

@@ -18,7 +18,7 @@ try:
     )
     _LOGFIRE_IS_INSTALLED = True
-except ImportError:  # pragma: no cover
+except ImportError:  # pragma: lax no cover
     _LOGFIRE_IS_INSTALLED = False  # pyright: ignore[reportConstantRedefinition]
     # Ensure that we can do an isinstance check without erroring

{pydantic_evals-1.0.0b1 → pydantic_evals-1.0.2}/pyproject.toml RENAMED Viewed

@@ -24,7 +24,7 @@ authors = [
 license = "MIT"
 readme = "README.md"
 classifiers = [
-    "Development Status :: 4 - Beta",
+    "Development Status :: 5 - Production/Stable",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3 :: Only",
@@ -52,7 +52,6 @@ dependencies = [
     "pydantic>=2.10",
     "pydantic-ai-slim=={{ version }}",
     "anyio>=0",
-    "eval-type-backport>=0; python_version < '3.11'",
     "pyyaml>=6.0.2",
 ]