PyPI - azure-ai-evaluation - Versions diffs - 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show

azure/ai/evaluation/_evaluators/_content_safety/_sexual.py CHANGED Viewed

@@ -41,9 +41,9 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
     :param threshold: The threshold for the Sexual evaluator. Default is 3.
     :type threshold: int
@@ -55,17 +55,17 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :language: python
             :dedent: 8
             :caption: Initialize and call a SexualEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START sexual_evaluator]
             :end-before: [END sexual_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
+            :caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. admonition:: Example with Threshold:
         .. literalinclude:: ../samples/evaluation_samples_threshold.py
@@ -76,8 +76,9 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :caption: Initialize with threshold and call a SexualEvaluator.
     """
-    id = "azureml://registries/azureml/models/Sexual-Content-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/sexual"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
@@ -86,6 +87,7 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
         azure_ai_project,
         *,
         threshold: int = 3,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.SEXUAL,
@@ -94,6 +96,7 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             conversation_aggregation_type=_AggregationType.MAX,
             threshold=threshold,
             _higher_is_better=False,
+            **kwargs,
         )
     @overload
@@ -146,7 +149,7 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             key "messages". Conversation turns are expected
             to be dictionaries with keys "content" and "role".
         :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
-        :return: The fluency score.
+        :return: The sexual score.
         :rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[str, float, Dict[str, List[Union[str, float]]]]]]
         """
         return super().__call__(*args, **kwargs)

azure/ai/evaluation/_evaluators/_content_safety/_violence.py CHANGED Viewed

@@ -41,9 +41,9 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
     :param threshold: The threshold for the Violence evaluator. Default is 3.
     :type threshold: int
@@ -57,15 +57,15 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :caption: Initialize and call a ViolenceEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START violence_evaluator]
             :end-before: [END violence_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
+            :caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. admonition:: Example:
         .. literalinclude:: ../samples/evaluation_samples_threshold.py
@@ -76,8 +76,9 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             :caption: Initialize with threshold and call a ViolenceEvaluator.
     """
-    id = "azureml://registries/azureml/models/Violent-Content-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/violence"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
@@ -86,6 +87,7 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
         azure_ai_project,
         *,
         threshold: int = 3,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.VIOLENCE,
@@ -94,6 +96,7 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
             conversation_aggregation_type=_AggregationType.MAX,
             threshold=threshold,
             _higher_is_better=False,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py CHANGED Viewed

@@ -4,8 +4,4 @@
 from ._document_retrieval import DocumentRetrievalEvaluator, RetrievalGroundTruthDocument, RetrievedDocument
-__all__ = [
-    "DocumentRetrievalEvaluator",
-    "RetrievalGroundTruthDocument",
-    "RetrievedDocument"
-]
+__all__ = ["DocumentRetrievalEvaluator", "RetrievalGroundTruthDocument", "RetrievedDocument"]

azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py CHANGED Viewed

@@ -14,9 +14,7 @@ RetrievalGroundTruthDocument = TypedDict(
     "RetrievalGroundTruthDocument", {"document_id": str, "query_relevance_label": int}
 )
-RetrievedDocument = TypedDict(
-    "RetrievedDocument", {"document_id": str, "relevance_score": float}
-)
+RetrievedDocument = TypedDict("RetrievedDocument", {"document_id": str, "relevance_score": float})
 class DocumentRetrievalEvaluator(EvaluatorBase):
@@ -33,15 +31,15 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
             :caption: Initialize and call a DocumentRetrievalEvaluator
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START document_retrieval_evaluator]
             :end-before: [END document_retrieval_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call DocumentRetrievalEvaluator using Azure AI Project URL in following format
+            :caption: Initialize and call DocumentRetrievalEvaluator using Azure AI Project URL in following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. admonition:: Example with Threshold:
         .. literalinclude:: ../samples/evaluation_samples_threshold.py
             :start-after: [START threshold_document_retrieval_evaluator]
@@ -51,6 +49,9 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
             :caption: Initialize with threshold and call a DocumentRetrievalEvaluator.
     """
+    id = "azureai://built-in/evaluators/document_retrieval"
+    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     def __init__(
         self,
         *,
@@ -62,7 +63,7 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
         top1_relevance_threshold: Optional[float] = 50.0,
         top3_max_relevance_threshold: Optional[float] = 50.0,
         total_retrieved_documents_threshold: Optional[int] = 50,
-        total_ground_truth_documents_threshold: Optional[int] = 50
+        total_ground_truth_documents_threshold: Optional[int] = 50,
     ):
         super().__init__()
         self.k = 3
@@ -74,14 +75,10 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
             )
         if not isinstance(ground_truth_label_min, int):
-            raise EvaluationException(
-                "The ground truth label minimum must be an integer value."
-            )
+            raise EvaluationException("The ground truth label minimum must be an integer value.")
         if not isinstance(ground_truth_label_max, int):
-            raise EvaluationException(
-                "The ground truth label maximum must be an integer value."
-            )
+            raise EvaluationException("The ground truth label maximum must be an integer value.")
         self.ground_truth_label_min = ground_truth_label_min
         self.ground_truth_label_max = ground_truth_label_max
@@ -122,7 +119,7 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
     ) -> float:
         """NDCG (Normalized Discounted Cumulative Gain) calculated for the top K documents retrieved from a search query.
         NDCG measures how well a document ranking compares to an ideal document ranking given a list of ground-truth documents.
         :param result_docs_groundtruth_labels: A list of retrieved documents' ground truth labels.
         :type result_docs_groundtruth_labels: List[int]
         :param ideal_docs_groundtruth_labels: A list of ideal documents' ground truth labels.
@@ -145,7 +142,7 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
     def _compute_xdcg(self, result_docs_groundtruth_labels: List[int]) -> float:
         """XDCG calculated for the top K documents retrieved from a search query.
         XDCG measures how objectively good are the top K documents, discounted by their position in the list.
         :param result_docs_groundtruth_labels: A list of retrieved documents' ground truth labels.
         :type result_docs_groundtruth_labels: List[int]
         :return: The XDCG@K calculation result.
@@ -159,11 +156,7 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
             return math.pow(self.xdcg_discount_factor, rank - 1)
         ranks = list(range(1, self.k + 1))
-        xdcg_n = sum(
-            starmap(
-                calculate_xdcg_numerator, zip(result_docs_groundtruth_labels, ranks)
-            )
-        )
+        xdcg_n = sum(starmap(calculate_xdcg_numerator, zip(result_docs_groundtruth_labels, ranks)))
         xdcg_d = sum(map(calculate_xdcg_denominator, ranks))
         return xdcg_n / float(xdcg_d)
@@ -175,7 +168,7 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
     ) -> float:
         """Fidelity calculated over all documents retrieved from a search query.
         Fidelity measures how objectively good are all of the documents retrieved compared with all known good documents in the underlying data store.
         :param result_docs_groundtruth_labels: A list of retrieved documents' ground truth labels.
         :type result_docs_groundtruth_labels: List[int]
         :param ideal_docs_groundtruth_labels: A list of ideal documents' ground truth labels.
@@ -196,25 +189,16 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
                 if label >= s:
                     label_counts[str(label)] += 1
-            sorted_label_counts = [
-                x[1] for x in sorted(label_counts.items(), key=lambda x: x[0])
-            ]
+            sorted_label_counts = [x[1] for x in sorted(label_counts.items(), key=lambda x: x[0])]
             # calculate weights
-            weights = [
-                (math.pow(2, i + 1) - 1)
-                for i in range(s, self.ground_truth_label_max + 1)
-            ]
+            weights = [(math.pow(2, i + 1) - 1) for i in range(s, self.ground_truth_label_max + 1)]
             # return weighted sum
             return sum(starmap(operator.mul, zip(sorted_label_counts, weights)))
-        weighted_sum_by_rating_results = calculate_weighted_sum_by_rating(
-            result_docs_groundtruth_labels
-        )
-        weighted_sum_by_rating_index = calculate_weighted_sum_by_rating(
-            ideal_docs_groundtruth_labels
-        )
+        weighted_sum_by_rating_results = calculate_weighted_sum_by_rating(result_docs_groundtruth_labels)
+        weighted_sum_by_rating_index = calculate_weighted_sum_by_rating(ideal_docs_groundtruth_labels)
         if weighted_sum_by_rating_index == 0:
             return math.nan
@@ -226,12 +210,16 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
         for metric_name, metric_value in metrics.items():
             if metric_name in self._threshold_metrics.keys():
-                result[f"{metric_name}_result"] = "pass" if metric_value >= self._threshold_metrics[metric_name] else "fail"
+                result[f"{metric_name}_result"] = (
+                    "pass" if metric_value >= self._threshold_metrics[metric_name] else "fail"
+                )
                 result[f"{metric_name}_threshold"] = self._threshold_metrics[metric_name]
                 result[f"{metric_name}_higher_is_better"] = True
             elif metric_name in self._threshold_holes.keys():
-                result[f"{metric_name}_result"] = "pass" if metric_value <= self._threshold_holes[metric_name] else "fail"
+                result[f"{metric_name}_result"] = (
+                    "pass" if metric_value <= self._threshold_holes[metric_name] else "fail"
+                )
                 result[f"{metric_name}_threshold"] = self._threshold_holes[metric_name]
                 result[f"{metric_name}_higher_is_better"] = False
@@ -256,8 +244,10 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
         # if the qrels are empty, no meaningful evaluation is possible
         if not retrieval_ground_truth:
             raise EvaluationException(
-                ("'retrieval_ground_truth' parameter must contain at least one item. "
-                 "Check your data input to be sure that each input record has ground truth defined.")
+                (
+                    "'retrieval_ground_truth' parameter must contain at least one item. "
+                    "Check your data input to be sure that each input record has ground truth defined."
+                )
             )
         qrels = []
@@ -277,9 +267,7 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
                 )
             if not isinstance(query_relevance_label, int):
-                raise EvaluationException(
-                    "Query relevance labels must be integer values."
-                )
+                raise EvaluationException("Query relevance labels must be integer values.")
             if query_relevance_label < self.ground_truth_label_min:
                 raise EvaluationException(
@@ -318,12 +306,8 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
                         )
                     )
-                if not isinstance(relevance_score, float) and not isinstance(
-                    relevance_score, int
-                ):
-                    raise EvaluationException(
-                        "Retrieved document relevance score must be a numerical value."
-                    )
+                if not isinstance(relevance_score, float) and not isinstance(relevance_score, int):
+                    raise EvaluationException("Retrieved document relevance score must be a numerical value.")
                 results.append(result)
@@ -368,24 +352,17 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
         results_lookup = {x["document_id"]: x["relevance_score"] for x in results}
         # sort each input set by label to get the ranking
-        qrels_sorted_by_rank = sorted(
-            qrels_lookup.items(), key=lambda x: x[1], reverse=True
-        )
-        results_sorted_by_rank = sorted(
-            results_lookup.items(), key=lambda x: x[1], reverse=True
-        )
+        qrels_sorted_by_rank = sorted(qrels_lookup.items(), key=lambda x: x[1], reverse=True)
+        results_sorted_by_rank = sorted(results_lookup.items(), key=lambda x: x[1], reverse=True)
         # find ground truth labels for the results set and ideal set
         result_docs_groundtruth_labels = [
-            qrels_lookup[doc_id] if doc_id in qrels_lookup else 0
-            for (doc_id, _) in results_sorted_by_rank
+            qrels_lookup[doc_id] if doc_id in qrels_lookup else 0 for (doc_id, _) in results_sorted_by_rank
         ]
         ideal_docs_groundtruth_labels = [label for (_, label) in qrels_sorted_by_rank]
         # calculate the proportion of result docs with no ground truth label (holes)
-        holes = self._compute_holes(
-            [x[0] for x in results_sorted_by_rank], [x[0] for x in qrels_sorted_by_rank]
-        )
+        holes = self._compute_holes([x[0] for x in results_sorted_by_rank], [x[0] for x in qrels_sorted_by_rank])
         holes_ratio = holes / float(len(results))
         # if none of the retrieved docs are labeled, report holes only
@@ -412,12 +389,8 @@ class DocumentRetrievalEvaluator(EvaluatorBase):
                 result_docs_groundtruth_labels[: self.k],
                 ideal_docs_groundtruth_labels[: self.k],
             ),
-            f"xdcg@{self.k}": self._compute_xdcg(
-                result_docs_groundtruth_labels[: self.k]
-            ),
-            "fidelity": self._compute_fidelity(
-                result_docs_groundtruth_labels, ideal_docs_groundtruth_labels
-            ),
+            f"xdcg@{self.k}": self._compute_xdcg(result_docs_groundtruth_labels[: self.k]),
+            "fidelity": self._compute_fidelity(result_docs_groundtruth_labels, ideal_docs_groundtruth_labels),
             "top1_relevance": result_docs_groundtruth_labels[0],
             "top3_max_relevance": max(result_docs_groundtruth_labels[: self.k]),
             "holes": holes,

azure/ai/evaluation/_evaluators/_eci/_eci.py CHANGED Viewed

@@ -22,9 +22,9 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
     :return: Whether or not ECI was found in the response without a disclaimer, with AI-generated reasoning
     :rtype: Dict[str, str]
@@ -52,17 +52,20 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
     id = "eci"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=_InternalEvaluationMetrics.ECI,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py CHANGED Viewed

@@ -39,15 +39,15 @@ class F1ScoreEvaluator(EvaluatorBase):
             :caption: Initialize and call an F1ScoreEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START f1_score_evaluator]
             :end-before: [END f1_score_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call F1ScoreEvaluator using Azure AI Project URL in following format
+            :caption: Initialize and call F1ScoreEvaluator using Azure AI Project URL in following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. admonition:: Example with Threshold:
         .. literalinclude:: ../samples/evaluation_samples_threshold.py
@@ -58,7 +58,7 @@ class F1ScoreEvaluator(EvaluatorBase):
             :caption: Initialize with threshold and call an F1ScoreEvaluator.
     """
-    id = "azureml://registries/azureml/models/F1Score-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/f1_score"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     def __init__(self, *, threshold=0.5):
@@ -147,7 +147,7 @@ class F1ScoreEvaluator(EvaluatorBase):
             if f1_result <= self._threshold:
                 binary_result = True
         return {
-            "f1_score": f1_result,
+            "f1_score": f1_result,
             "f1_result": EVALUATION_PASS_FAIL_MAPPING[binary_result],
             "f1_threshold": self._threshold,
         }

azure/ai/evaluation/_evaluators/_fluency/_fluency.py CHANGED Viewed

@@ -45,7 +45,7 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             :caption: Initialize with threshold and call a FluencyEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START fluency_evaluator]
             :end-before: [END fluency_evaluator]
@@ -64,7 +64,7 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     _PROMPTY_FILE = "fluency.prompty"
     _RESULT_KEY = "fluency"
-    id = "azureml://registries/azureml/models/Fluency-Evaluator/versions/4"
+    id = "azureai://built-in/evaluators/fluency"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     @override
@@ -78,7 +78,7 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             prompty_file=prompty_path,
             result_key=self._RESULT_KEY,
             threshold=threshold,
-            _higher_is_better=self._higher_is_better
+            _higher_is_better=self._higher_is_better,
         )
     @overload

azure/ai/evaluation/_evaluators/_gleu/_gleu.py CHANGED Viewed

@@ -34,7 +34,7 @@ class GleuScoreEvaluator(EvaluatorBase):
             :language: python
             :dedent: 8
             :caption: Initialize and call a GleuScoreEvaluator.
     .. admonition:: Example with Threshold:
         .. literalinclude:: ../samples/evaluation_samples_threshold.py
@@ -45,17 +45,17 @@ class GleuScoreEvaluator(EvaluatorBase):
             :caption: Initialize with threshold and call a GleuScoreEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START gleu_score_evaluator]
             :end-before: [END gleu_score_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call GleuScoreEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call GleuScoreEvaluator using Azure AI Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     """
-    id = "azureml://registries/azureml/models/Gleu-Score-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/gleu_score"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     @override

azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py CHANGED Viewed

@@ -12,9 +12,13 @@ from azure.ai.evaluation._model_configurations import Conversation
 from ..._common.utils import construct_prompty_model_config, validate_model_config
 try:
-    from ..._user_agent import USER_AGENT
+    from ..._user_agent import UserAgentSingleton
 except ImportError:
-    USER_AGENT = "None"
+    class UserAgentSingleton:
+        @property
+        def value(self) -> str:
+            return "None"
 class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
@@ -35,7 +39,7 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the groundedness evaluator. Default is 3.
     :type threshold: int
     .. admonition:: Example:
         .. literalinclude:: ../samples/evaluation_samples_evaluate.py
@@ -54,13 +58,13 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             :caption: Initialize with threshold and call a GroundednessEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START groundedness_evaluator]
             :end-before: [END groundedness_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call GroundednessEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call GroundednessEvaluator using Azure AI Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. note::
@@ -75,7 +79,7 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     _RESULT_KEY = "groundedness"
     _OPTIONAL_PARAMS = ["query"]
-    id = "azureml://registries/azureml/models/Groundedness-Evaluator/versions/4"
+    id = "azureai://built-in/evaluators/groundedness"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     @override
@@ -89,7 +93,7 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             prompty_file=prompty_path,
             result_key=self._RESULT_KEY,
             threshold=threshold,
-            _higher_is_better=self._higher_is_better
+            _higher_is_better=self._higher_is_better,
         )
         self._model_config = model_config
         self.threshold = threshold
@@ -165,7 +169,7 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             prompty_model_config = construct_prompty_model_config(
                 validate_model_config(self._model_config),
                 self._DEFAULT_OPEN_API_VERSION,
-                USER_AGENT,
+                UserAgentSingleton().value,
             )
             self._flow = AsyncPrompty.load(source=self._prompty_file, model=prompty_model_config)

azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.10.0py3-none-any.whl