PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b4py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

azure/ai/evaluation/__init__.py CHANGED Viewed

@@ -12,10 +12,19 @@ from ._evaluators._content_safety import (
     SexualEvaluator,
     ViolenceEvaluator,
 )
+from ._evaluators._multimodal._content_safety_multimodal import (
+    ContentSafetyMultimodalEvaluator,
+    HateUnfairnessMultimodalEvaluator,
+    SelfHarmMultimodalEvaluator,
+    SexualMultimodalEvaluator,
+    ViolenceMultimodalEvaluator,
+)
+from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
 from ._evaluators._f1_score import F1ScoreEvaluator
 from ._evaluators._fluency import FluencyEvaluator
 from ._evaluators._gleu import GleuScoreEvaluator
 from ._evaluators._groundedness import GroundednessEvaluator
+from ._evaluators._service_groundedness import GroundednessProEvaluator
 from ._evaluators._meteor import MeteorScoreEvaluator
 from ._evaluators._protected_material import ProtectedMaterialEvaluator
 from ._evaluators._qa import QAEvaluator
@@ -27,7 +36,10 @@ from ._evaluators._xpia import IndirectAttackEvaluator
 from ._model_configurations import (
     AzureAIProject,
     AzureOpenAIModelConfiguration,
+    Conversation,
+    EvaluationResult,
     EvaluatorConfig,
+    Message,
     OpenAIModelConfiguration,
 )
@@ -37,6 +49,7 @@ __all__ = [
     "F1ScoreEvaluator",
     "FluencyEvaluator",
     "GroundednessEvaluator",
+    "GroundednessProEvaluator",
     "RelevanceEvaluator",
     "SimilarityEvaluator",
     "QAEvaluator",
@@ -57,4 +70,13 @@ __all__ = [
     "AzureOpenAIModelConfiguration",
     "OpenAIModelConfiguration",
     "EvaluatorConfig",
+    "Conversation",
+    "Message",
+    "EvaluationResult",
+    "ContentSafetyMultimodalEvaluator",
+    "HateUnfairnessMultimodalEvaluator",
+    "SelfHarmMultimodalEvaluator",
+    "SexualMultimodalEvaluator",
+    "ViolenceMultimodalEvaluator",
+    "ProtectedMaterialMultimodalEvaluator",
 ]

azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py RENAMED Viewed

@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import os
 import functools
 import inspect
 import logging
@@ -149,6 +150,9 @@ def _get_indentation_size(doc_string: str) -> int:
 def _should_skip_warning():
     skip_warning_msg = False
+    if os.getenv("AI_EVALS_DISABLE_EXPERIMENTAL_WARNING", "false").lower() == "true":
+        skip_warning_msg = True
     # Cases where we want to suppress the warning:
     # 1. When converting from REST object to SDK object
     for frame in inspect.stack():

azure/ai/evaluation/_common/constants.py CHANGED Viewed

@@ -6,6 +6,9 @@ from enum import Enum
 from azure.core import CaseInsensitiveEnumMeta
+PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency"]
 class CommonConstants:
     """Define common constants."""
@@ -35,6 +38,7 @@ class Tasks:
     CONTENT_HARM = "content harm"
     PROTECTED_MATERIAL = "protected material"
     XPIA = "xpia"
+    GROUNDEDNESS = "groundedness"
 class _InternalAnnotationTasks:
@@ -56,6 +60,7 @@ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     SEXUAL = "sexual"
     PROTECTED_MATERIAL = "protected_material"
     XPIA = "xpia"
+    GROUNDEDNESS = "generic_groundedness"
 class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):

azure/ai/evaluation/_common/math.py CHANGED Viewed

@@ -3,16 +3,87 @@
 # ---------------------------------------------------------
 import math
-from typing import List
+from typing import List, Callable, Any
+from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
 def list_sum(lst: List[float]) -> float:
+    """Given a list of floats, return the sum of the values.
+    :param lst: A list of floats.
+    :type lst: List[float]
+    :return: The sum of the values in the list.
+    :rtype: float
+    """
     return sum(lst)
 def list_mean(lst: List[float]) -> float:
+    """Given a list of floats, calculate the mean of the values.
+    :param lst: A list of floats.
+    :type lst: List[float]
+    :return: The mean of the values in the list.
+    :rtype: float
+    """
     return list_sum(lst) / len(lst)
 def list_mean_nan_safe(lst: List[float]) -> float:
-    return list_mean([l for l in lst if not math.isnan(l)])
+    """Given a list of floats, remove all nan or None values, then calculate the mean of the remaining values.
+    :param lst: A list of floats.
+    :type lst: List[float]
+    :return: The mean of the values in the list.
+    :rtype: float
+    """
+    msg = "All score values are NaN. The mean cannot be calculated."
+    if all(math.isnan(l) for l in lst):
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            blame=ErrorBlame.USER_ERROR,
+            category=ErrorCategory.INVALID_VALUE,
+            target=ErrorTarget.CONVERSATION,
+        )
+    return list_mean([l for l in lst if not is_none_or_nan(l)])
+def apply_transform_nan_safe(lst: List[float], transform_fn: Callable[[float], Any]) -> List[Any]:
+    """Given a list of floats, remove all nan values, then apply the inputted transform function
+    to the remaining values, and return the resulting list of outputted values.
+    :param lst: A list of floats.
+    :type lst: List[float]
+    :param transform_fn: A function that produces something when applied to a float.
+    :type transform_fn: Callable[[float], Any]
+    :return: A list of the transformed values.
+    :rtype: List[Any]
+    """
+    msg = "All score values are NaN. The mean cannot be calculated."
+    if all(math.isnan(l) for l in lst):
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            blame=ErrorBlame.USER_ERROR,
+            category=ErrorCategory.INVALID_VALUE,
+            target=ErrorTarget.CONVERSATION,
+        )
+    return [transform_fn(l) for l in lst if not is_none_or_nan(l)]
+def is_none_or_nan(val: float) -> bool:
+    """math.isnan raises an error if None is inputted. This is a more robust wrapper.
+    :param val: The value to check.
+    :type val: float
+    :return: Whether the value is None or NaN.
+    :rtype: bool
+    """
+    return val is None or math.isnan(val)

azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl

azure-ai-evaluation 1.0.0b4py3-none-any.whl → 1.0.1py3-none-any.whl