PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b3__py3-none-any.whl → 1.0.0b5__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b3py3-none-any.whl → 1.0.0b5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (93) hide show

azure/ai/evaluation/__init__.py CHANGED Viewed

@@ -12,10 +12,19 @@ from ._evaluators._content_safety import (
     SexualEvaluator,
     ViolenceEvaluator,
 )
+from ._evaluators._multimodal._content_safety_multimodal import (
+    ContentSafetyMultimodalEvaluator,
+    HateUnfairnessMultimodalEvaluator,
+    SelfHarmMultimodalEvaluator,
+    SexualMultimodalEvaluator,
+    ViolenceMultimodalEvaluator,
+)
+from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
 from ._evaluators._f1_score import F1ScoreEvaluator
 from ._evaluators._fluency import FluencyEvaluator
 from ._evaluators._gleu import GleuScoreEvaluator
 from ._evaluators._groundedness import GroundednessEvaluator
+from ._evaluators._service_groundedness import GroundednessProEvaluator
 from ._evaluators._meteor import MeteorScoreEvaluator
 from ._evaluators._protected_material import ProtectedMaterialEvaluator
 from ._evaluators._qa import QAEvaluator
@@ -27,8 +36,11 @@ from ._evaluators._xpia import IndirectAttackEvaluator
 from ._model_configurations import (
     AzureAIProject,
     AzureOpenAIModelConfiguration,
-    OpenAIModelConfiguration,
+    Conversation,
+    EvaluationResult,
     EvaluatorConfig,
+    Message,
+    OpenAIModelConfiguration,
 )
 __all__ = [
@@ -37,6 +49,7 @@ __all__ = [
     "F1ScoreEvaluator",
     "FluencyEvaluator",
     "GroundednessEvaluator",
+    "GroundednessProEvaluator",
     "RelevanceEvaluator",
     "SimilarityEvaluator",
     "QAEvaluator",
@@ -57,4 +70,13 @@ __all__ = [
     "AzureOpenAIModelConfiguration",
     "OpenAIModelConfiguration",
     "EvaluatorConfig",
+    "Conversation",
+    "Message",
+    "EvaluationResult",
+    "ContentSafetyMultimodalEvaluator",
+    "HateUnfairnessMultimodalEvaluator",
+    "SelfHarmMultimodalEvaluator",
+    "SexualMultimodalEvaluator",
+    "ViolenceMultimodalEvaluator",
+    "ProtectedMaterialMultimodalEvaluator",
 ]

azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py RENAMED Viewed

@@ -6,9 +6,9 @@ import functools
 import inspect
 import logging
 import sys
-from typing import Callable, Type, TypeVar, Union
+from typing import Callable, Type, TypeVar, Union, overload
-from typing_extensions import ParamSpec
+from typing_extensions import ParamSpec, TypeGuard
 DOCSTRING_TEMPLATE = ".. note::    {0} {1}\n\n"
 DOCSTRING_DEFAULT_INDENTATION = 8
@@ -22,20 +22,31 @@ EXPERIMENTAL_LINK_MESSAGE = (
 _warning_cache = set()
 module_logger = logging.getLogger(__name__)
-TExperimental = TypeVar("TExperimental", bound=Union[Type, Callable])
 P = ParamSpec("P")
 T = TypeVar("T")
-def experimental(wrapped: TExperimental) -> TExperimental:
+@overload
+def experimental(wrapped: Type[T]) -> Type[T]: ...
+@overload
+def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ...
+def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]:
     """Add experimental tag to a class or a method.
     :param wrapped: Either a Class or Function to mark as experimental
-    :type wrapped: TExperimental
+    :type wrapped: Union[Type[T], Callable[P, T]]
     :return: The wrapped class or method
-    :rtype: TExperimental
+    :rtype: Union[Type[T], Callable[P, T]]
     """
-    if inspect.isclass(wrapped):
+    def is_class(t: Union[Type[T], Callable[P, T]]) -> TypeGuard[Type[T]]:
+        return isinstance(t, type)
+    if is_class(wrapped):
         return _add_class_docstring(wrapped)
     if inspect.isfunction(wrapped):
         return _add_method_docstring(wrapped)
@@ -74,11 +85,11 @@ def _add_class_docstring(cls: Type[T]) -> Type[T]:
         cls.__doc__ = _add_note_to_docstring(cls.__doc__, doc_string)
     else:
         cls.__doc__ = doc_string + ">"
-    cls.__init__ = _add_class_warning(cls.__init__)
+    cls.__init__ = _add_class_warning(cls.__init__)  # type: ignore[method-assign]
     return cls
-def _add_method_docstring(func: Callable[P, T] = None) -> Callable[P, T]:
+def _add_method_docstring(func: Callable[P, T]) -> Callable[P, T]:
     """Add experimental tag to the method doc string.
     :param func: The function to update

azure/ai/evaluation/_common/constants.py CHANGED Viewed

@@ -3,6 +3,11 @@
 # ---------------------------------------------------------
 from enum import Enum
+from azure.core import CaseInsensitiveEnumMeta
+PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency"]
 class CommonConstants:
     """Define common constants."""
@@ -33,6 +38,7 @@ class Tasks:
     CONTENT_HARM = "content harm"
     PROTECTED_MATERIAL = "protected material"
     XPIA = "xpia"
+    GROUNDEDNESS = "groundedness"
 class _InternalAnnotationTasks:
@@ -43,7 +49,7 @@ class _InternalAnnotationTasks:
     ECI = "eci"
-class EvaluationMetrics:
+class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Evaluation metrics to aid the RAI service in determining what
     metrics to request, and how to present them back to the user."""
@@ -54,9 +60,10 @@ class EvaluationMetrics:
     SEXUAL = "sexual"
     PROTECTED_MATERIAL = "protected_material"
     XPIA = "xpia"
+    GROUNDEDNESS = "generic_groundedness"
-class _InternalEvaluationMetrics:
+class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Evaluation metrics that are not publicly supported.
     These metrics are experimental and subject to potential change or migration to the main
     enum over time.

azure/ai/evaluation/_common/math.py ADDED Viewed

@@ -0,0 +1,29 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import math
+from typing import List
+from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
+def list_sum(lst: List[float]) -> float:
+    return sum(lst)
+def list_mean(lst: List[float]) -> float:
+    return list_sum(lst) / len(lst)
+def list_mean_nan_safe(lst: List[float]) -> float:
+    msg = "All score values are NaN. The mean cannot be calculated."
+    if all(math.isnan(l) for l in lst):
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            blame=ErrorBlame.USER_ERROR,
+            category=ErrorCategory.INVALID_VALUE,
+            target=ErrorTarget.CONVERSATION,
+        )
+    return list_mean([l for l in lst if not math.isnan(l)])

azure-ai-evaluation 1.0.0b3__py3-none-any.whl → 1.0.0b5__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b3py3-none-any.whl → 1.0.0b5py3-none-any.whl