PyPI - azure-ai-evaluation - Versions diffs - 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show

azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py CHANGED Viewed

@@ -1,13 +1,11 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from concurrent.futures import as_completed
-from typing import Callable, Dict, List, Union
+from typing import Dict, List, Union
-from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
 from typing_extensions import overload, override
-from azure.ai.evaluation._evaluators._common import EvaluatorBase
+from azure.ai.evaluation._evaluators._common import MultiEvaluatorBase
 from azure.ai.evaluation._model_configurations import Conversation
 from azure.ai.evaluation._common._experimental import experimental
@@ -18,15 +16,25 @@ from ._violence import ViolenceEvaluator
 @experimental
-class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
+class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
     """
-    Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.
+    Initialize a content safety evaluator configured to evaluate content safety metrics for QA scenario.
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
+    :param violence_threshold: The threshold for violence evaluation. Default is 3.
+    :type violence_threshold: int
+    :param sexual_threshold: The threshold for sexual content evaluation. Default is 3.
+    :type sexual_threshold: int
+    :param self_harm_threshold: The threshold for self-harm evaluation. Default is 3.
+    :type self_harm_threshold: int
+    :param hate_unfairness_threshold: The threshold for hate/unfairness evaluation. Default is 3.
+    :type hate_unfairness_threshold: int
+    :param evaluate_query: Whether to also evaluate the query in addition to the response. Default is False.
+    :type evaluate_query: bool
     :param kwargs: Additional arguments to pass to the evaluator.
     :type kwargs: Any
     :return: A function that evaluates content-safety metrics for "question-answering" scenario.
@@ -38,22 +46,67 @@ class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
             :end-before: [END content_safety_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call a ContentSafetyEvaluator.
+            :caption: Initialize and call ContentSafetyEvaluator using azure.ai.evaluation.AzureAIProject.
+    .. admonition:: Example using Azure AI Project URL:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
+            :start-after: [START content_safety_evaluator]
+            :end-before: [END content_safety_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call ContentSafetyEvaluator using Azure AI Project URL in the following format
+                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
+    .. admonition:: Example with Threshold:
+        .. literalinclude:: ../samples/evaluation_samples_threshold.py
+            :start-after: [START threshold_content_safety_evaluator]
+            :end-before: [END threshold_content_safety_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize with threshold and call a ContentSafetyEvaluator with a query and response.
     """
-    id = "content_safety"
+    id = "azureai://built-in/evaluators/content_safety"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
-    # TODO address 3579092 to re-enabled parallel evals.
-    def __init__(self, credential, azure_ai_project, **kwargs):
-        super().__init__()
-        self._parallel = kwargs.pop("_parallel", False)
-        self._evaluators: List[Callable[..., Dict[str, Union[str, float]]]] = [
-            ViolenceEvaluator(credential, azure_ai_project),
-            SexualEvaluator(credential, azure_ai_project),
-            SelfHarmEvaluator(credential, azure_ai_project),
-            HateUnfairnessEvaluator(credential, azure_ai_project),
+    def __init__(
+        self,
+        credential,
+        azure_ai_project,
+        *,
+        violence_threshold: int = 3,
+        sexual_threshold: int = 3,
+        self_harm_threshold: int = 3,
+        hate_unfairness_threshold: int = 3,
+        **kwargs,
+    ):
+        # Type checking
+        for name, value in [
+            ("violence_threshold", violence_threshold),
+            ("sexual_threshold", sexual_threshold),
+            ("self_harm_threshold", self_harm_threshold),
+            ("hate_unfairness_threshold", hate_unfairness_threshold),
+        ]:
+            if not isinstance(value, int):
+                raise TypeError(f"{name} must be an int, got {type(value)}")
+        # Extract evaluate_query from kwargs if present
+        evaluate_query_kwargs = {}
+        if "evaluate_query" in kwargs:
+            evaluate_query_kwargs["evaluate_query"] = kwargs["evaluate_query"]
+        evaluators = [
+            ViolenceEvaluator(credential, azure_ai_project, threshold=violence_threshold, **evaluate_query_kwargs),
+            SexualEvaluator(credential, azure_ai_project, threshold=sexual_threshold, **evaluate_query_kwargs),
+            SelfHarmEvaluator(credential, azure_ai_project, threshold=self_harm_threshold, **evaluate_query_kwargs),
+            HateUnfairnessEvaluator(
+                credential, azure_ai_project, threshold=hate_unfairness_threshold, **evaluate_query_kwargs
+            ),
         ]
+        super().__init__(evaluators=evaluators, **kwargs)
     @overload
     def __call__(
@@ -109,36 +162,3 @@ class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
         :rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[float, Dict[str, List[Union[str, float]]]]]]
         """
         return super().__call__(*args, **kwargs)
-    @override
-    async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]:
-        """Perform the evaluation using the Azure AI RAI service.
-        The exact evaluation performed is determined by the evaluation metric supplied
-        by the child class initializer.
-        :param eval_input: The input to the evaluation function.
-        :type eval_input: Dict
-        :return: The evaluation result.
-        :rtype: Dict
-        """
-        query = eval_input.get("query", None)
-        response = eval_input.get("response", None)
-        conversation = eval_input.get("conversation", None)
-        results: Dict[str, Union[str, float]] = {}
-        # TODO fix this to not explode on empty optional inputs (PF SKD error)
-        if self._parallel:
-            with ThreadPoolExecutor() as executor:
-                # pylint: disable=no-value-for-parameter
-                futures = {
-                    executor.submit(query=query, response=response, conversation=conversation): evaluator
-                    for evaluator in self._evaluators
-                }
-                for future in as_completed(futures):
-                    results.update(future.result())
-        else:
-            for evaluator in self._evaluators:
-                result = evaluator(query=query, response=response, conversation=conversation)
-                results.update(result)
-        return results

azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py CHANGED Viewed

@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
 from azure.ai.evaluation._model_configurations import Conversation
+from azure.ai.evaluation._constants import _AggregationType
 @experimental
@@ -44,9 +45,11 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
+    :param threshold: The threshold for the HateUnfairness evaluator. Default is 3.
+    :type threshold: int
     .. admonition:: Example:
@@ -55,22 +58,49 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :end-before: [END hate_unfairness_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call a HateUnfairnessEvaluator.
+            :caption: Initialize and call HateUnfairnessEvaluator using azure.ai.evaluation.AzureAIProject.
+    .. admonition:: Example using Azure AI Project URL:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
+            :start-after: [START hate_unfairness_evaluator]
+            :end-before: [END hate_unfairness_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call HateUnfairnessEvaluator using Azure AI Project URL in the following format
+                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
+    .. admonition:: Example with Threshold:
+        .. literalinclude:: ../samples/evaluation_samples_threshold.py
+            :start-after: [START threshold_hate_unfairness_evaluator]
+            :end-before: [END threshold_hate_unfairness_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize with threshold and call a HateUnfairnessEvaluator with a query and response.
     """
-    id = "azureml://registries/azureml/models/Hate-and-Unfairness-Evaluator/versions/4"
+    id = "azureai://built-in/evaluators/hate_unfairness"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        *,
+        threshold: int = 3,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.HATE_FAIRNESS,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            conversation_aggregation_type=_AggregationType.MAX,
+            threshold=threshold,
+            _higher_is_better=False,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py CHANGED Viewed

@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
 from azure.ai.evaluation._model_configurations import Conversation
+from azure.ai.evaluation._constants import _AggregationType
 @experimental
@@ -38,9 +39,11 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
+    :param threshold: The threshold for the SelfHarm evaluator. Default is 3.
+    :type threshold: int
     .. admonition:: Example:
@@ -49,22 +52,40 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :end-before: [END self_harm_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call a SelfHarmEvaluator.
+            :caption: Initialize and call SelfHarmEvaluator using azure.ai.evaluation.AzureAIProject.
+    .. admonition:: Example using Azure AI Project URL:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
+            :start-after: [START self_harm_evaluator]
+            :end-before: [END self_harm_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call SelfHarmEvaluator using Azure AI Project URL in the following format
+                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
     """
-    id = "azureml://registries/azureml/models/Self-Harm-Related-Content-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/self_harm"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        *,
+        threshold: int = 3,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.SELF_HARM,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            conversation_aggregation_type=_AggregationType.MAX,
+            threshold=threshold,
+            _higher_is_better=False,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_content_safety/_sexual.py CHANGED Viewed

@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
 from azure.ai.evaluation._model_configurations import Conversation
+from azure.ai.evaluation._constants import _AggregationType
 @experimental
@@ -40,9 +41,11 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
+    :param threshold: The threshold for the Sexual evaluator. Default is 3.
+    :type threshold: int
     .. admonition:: Example:
@@ -52,21 +55,48 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :language: python
             :dedent: 8
             :caption: Initialize and call a SexualEvaluator.
+    .. admonition:: Example using Azure AI Project URL:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
+            :start-after: [START sexual_evaluator]
+            :end-before: [END sexual_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
+                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
+    .. admonition:: Example with Threshold:
+        .. literalinclude:: ../samples/evaluation_samples_threshold.py
+            :start-after: [START threshold_sexual_evaluator]
+            :end-before: [END threshold_sexual_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize with threshold and call a SexualEvaluator.
     """
-    id = "azureml://registries/azureml/models/Sexual-Content-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/sexual"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        *,
+        threshold: int = 3,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.SEXUAL,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            conversation_aggregation_type=_AggregationType.MAX,
+            threshold=threshold,
+            _higher_is_better=False,
+            **kwargs,
         )
     @overload
@@ -119,7 +149,7 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             key "messages". Conversation turns are expected
             to be dictionaries with keys "content" and "role".
         :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
-        :return: The fluency score.
+        :return: The sexual score.
         :rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[str, float, Dict[str, List[Union[str, float]]]]]]
         """
         return super().__call__(*args, **kwargs)

azure/ai/evaluation/_evaluators/_content_safety/_violence.py CHANGED Viewed

@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
 from azure.ai.evaluation._model_configurations import Conversation
+from azure.ai.evaluation._constants import _AggregationType
 @experimental
@@ -40,9 +41,11 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
+    :param threshold: The threshold for the Violence evaluator. Default is 3.
+    :type threshold: int
     .. admonition:: Example:
@@ -52,21 +55,48 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :language: python
             :dedent: 8
             :caption: Initialize and call a ViolenceEvaluator.
+    .. admonition:: Example using Azure AI Project URL:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
+            :start-after: [START violence_evaluator]
+            :end-before: [END violence_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
+                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_threshold.py
+            :start-after: [START threshold_violence_evaluator]
+            :end-before: [END threshold_violence_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize with threshold and call a ViolenceEvaluator.
     """
-    id = "azureml://registries/azureml/models/Violent-Content-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/violence"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        *,
+        threshold: int = 3,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.VIOLENCE,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            conversation_aggregation_type=_AggregationType.MAX,
+            threshold=threshold,
+            _higher_is_better=False,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from ._document_retrieval import DocumentRetrievalEvaluator, RetrievalGroundTruthDocument, RetrievedDocument
+__all__ = ["DocumentRetrievalEvaluator", "RetrievalGroundTruthDocument", "RetrievedDocument"]

azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.13.3py3-none-any.whl