PyPI - azure-ai-evaluation - Versions diffs - 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

azure-ai-evaluation 0.0.0b0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from ._service_groundedness import GroundednessProEvaluator
+__all__ = [
+    "GroundednessProEvaluator",
+]

azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py ADDED Viewed

@@ -0,0 +1,148 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from typing import List, Union, Dict
+from typing_extensions import overload, override
+from azure.ai.evaluation._common._experimental import experimental
+from azure.ai.evaluation._common.constants import EvaluationMetrics
+from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
+from azure.ai.evaluation._model_configurations import Conversation
+@experimental
+class GroundednessProEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
+    """
+    Evaluates service-based groundedness score for a given response, context, and query or a multi-turn conversation,
+    including reasoning.
+    The groundedness measure calls Azure AI Evaluation service to assess how well the AI-generated answer is grounded
+    in the source context. Even if the responses from LLM are factually correct, they'll be considered ungrounded if
+    they can't be verified against the provided sources (such as your input source or your database).
+    Service-based groundedness scores are boolean values, where True indicates that the response is grounded.
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
+    :param azure_ai_project: The scope of the Azure AI project.
+        It contains subscription id, resource group, and project name.
+    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param kwargs: Additional arguments to pass to the evaluator.
+    :type kwargs: Any
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
+            :start-after: [START groundedness_pro_evaluator]
+            :end-before: [END groundedness_pro_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call a GroundednessProEvaluator with a query, response, and context.
+    .. note::
+        If this evaluator is supplied to the `evaluate` function, the aggregated metric
+        for the groundedness pro label will be "groundedness_pro_passing_rate".
+    """
+    id = "azureml://registries/azureml/models/Groundedness-Pro-Evaluator/versions/1"
+    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    @override
+    def __init__(
+        self,
+        credential,
+        azure_ai_project,
+        **kwargs,
+    ):
+        self._passing_score = 5  # TODO update once the binarization PR is merged
+        self._output_prefix = "groundedness_pro"
+        super().__init__(
+            eval_metric=EvaluationMetrics.GROUNDEDNESS,
+            azure_ai_project=azure_ai_project,
+            credential=credential,
+            **kwargs,
+        )
+    @overload
+    def __call__(
+        self,
+        *,
+        response: str,
+        context: str,
+        query: str,
+    ) -> Dict[str, Union[str, bool]]:
+        """Evaluate groundedness for a given query/response/context
+        :keyword response: The response to be evaluated.
+        :paramtype response: str
+        :keyword context: The context to be evaluated.
+        :paramtype context: str
+        :keyword query: The query to be evaluated.
+        :paramtype query: Optional[str]
+        :return: The relevance score.
+        :rtype: Dict[str, Union[str, bool]]
+        """
+    @overload
+    def __call__(
+        self,
+        *,
+        conversation: Conversation,
+    ) -> Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]:
+        """Evaluate groundedness for a conversation for a multi-turn evaluation. If the conversation has
+        more than one turn, the evaluator will aggregate the results of each turn, with the per-turn results
+        available in the output under the "evaluation_per_turn" key.
+        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
+            key "messages", and potentially a global context under the key "context". Conversation turns are expected
+            to be dictionaries with keys "content", "role", and possibly "context".
+        :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
+        :return: The relevance score.
+        :rtype: Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]
+        """
+    @override
+    def __call__(  # pylint: disable=docstring-missing-param
+        self,
+        *args,
+        **kwargs,
+    ):
+        """Evaluate groundedness. Accepts either a query, response and context for a single-turn evaluation, or a
+        or a conversation for a multi-turn evaluation. If the conversation has more than one turn,
+        the evaluator will aggregate the results of each turn, with the per-turn results available
+        in the output under the "evaluation_per_turn" key.
+        :keyword query: The query to be evaluated.
+        :paramtype query: Optional[str]
+        :keyword response: The response to be evaluated.
+        :paramtype response: Optional[str]
+        :keyword context: The context to be evaluated.
+        :paramtype context: Optional[str]
+        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
+            key "messages", and potentially a global context under the key "context". Conversation turns are expected
+            to be dictionaries with keys "content", "role", and possibly "context".
+        :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
+        :return: The relevance score.
+        :rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]]
+        """
+        return super().__call__(*args, **kwargs)
+    @override
+    async def _do_eval(self, eval_input: Dict):
+        """This evaluator has some unique post-processing that requires data that
+        the rai_service script is not currently built to handle. So we post-post-process
+        the result here to message it into the right form.
+        :param eval_input: The input to the evaluation function.
+        :type eval_input: Dict
+        :return: The evaluation result.
+        :rtype: Dict
+        """
+        result = await super()._do_eval(eval_input)
+        real_result = {}
+        real_result[self._output_prefix + "_label"] = (
+            result[EvaluationMetrics.GROUNDEDNESS + "_score"] >= self._passing_score
+        )
+        real_result[self._output_prefix + "_reason"] = result[EvaluationMetrics.GROUNDEDNESS + "_reason"]
+        return real_result

azure/ai/evaluation/_evaluators/_similarity/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from ._similarity import SimilarityEvaluator
+__all__ = [
+    "SimilarityEvaluator",
+]

azure/ai/evaluation/_evaluators/_similarity/_similarity.py ADDED Viewed

@@ -0,0 +1,140 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import math
+import os
+import re
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+from promptflow.core import AsyncPrompty
+from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
+from ..._common.utils import construct_prompty_model_config, validate_model_config
+try:
+    from ..._user_agent import USER_AGENT
+except ImportError:
+    USER_AGENT = "None"
+class _AsyncSimilarityEvaluator:
+    # Constants must be defined within eval's directory to be save/loadable
+    _PROMPTY_FILE = "similarity.prompty"
+    _LLM_CALL_TIMEOUT = 600
+    _DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
+    def __init__(self, model_config: dict):
+        prompty_model_config = construct_prompty_model_config(
+            validate_model_config(model_config),
+            self._DEFAULT_OPEN_API_VERSION,
+            USER_AGENT,
+        )
+        current_dir = os.path.dirname(__file__)
+        prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
+        self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
+    async def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs):
+        """
+        Evaluate similarity.
+        :keyword query: The query to be evaluated.
+        :paramtype query: str
+        :keyword response: The response to be evaluated.
+        :paramtype response: str
+        :keyword ground_truth: The ground truth to be evaluated.
+        :paramtype ground_truth: str
+        :return: The similarity score.
+        :rtype: Dict[str, float]
+        """
+        # Validate input parameters
+        query = str(query or "")
+        response = str(response or "")
+        ground_truth = str(ground_truth or "")
+        if not (query.strip() and response.strip() and ground_truth.strip()):
+            msg = "'query', 'response' and 'ground_truth' must be non-empty strings."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                error_category=ErrorCategory.MISSING_FIELD,
+                error_blame=ErrorBlame.USER_ERROR,
+                error_target=ErrorTarget.SIMILARITY_EVALUATOR,
+            )
+        # Run the evaluation flow
+        llm_output = await self._flow(
+            query=query, response=response, ground_truth=ground_truth, timeout=self._LLM_CALL_TIMEOUT, **kwargs
+        )
+        score = math.nan
+        if llm_output:
+            match = re.search(r"\d", llm_output)
+            if match:
+                score = float(match.group())
+        return {"similarity": float(score), "gpt_similarity": float(score)}
+class SimilarityEvaluator:
+    """
+    Evaluates similarity score for a given query, response, and ground truth or a multi-turn conversation.
+    The similarity measure evaluates the likeness between a ground truth sentence (or document) and the
+    AI model's generated prediction. This calculation involves creating sentence-level embeddings for both
+    the ground truth and the model's prediction, which are high-dimensional vector representations capturing
+    the semantic meaning and context of the sentences.
+    Use it when you want an objective evaluation of an AI model's performance, particularly in text generation
+    tasks where you have access to ground truth responses. Similarity enables you to assess the generated
+    text's semantic alignment with the desired content, helping to gauge the model's quality and accuracy.
+    Similarity scores range from 1 to 5, with 1 being the least similar and 5 being the most similar.
+    :param model_config: Configuration for the Azure OpenAI model.
+    :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+        ~azure.ai.evaluation.OpenAIModelConfiguration]
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
+            :start-after: [START rouge_score_evaluator]
+            :end-before: [END rouge_score_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call a RougeScoreEvaluator with a four-gram rouge type.
+    .. note::
+        To align with our support of a diverse set of models, an output key without the `gpt_` prefix has been added.
+        To maintain backwards compatibility, the old key with the `gpt_` prefix is still be present in the output;
+        however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
+    """
+    id = "azureml://registries/azureml/models/Similarity-Evaluator/versions/3"
+    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    def __init__(self, model_config):
+        self._async_evaluator = _AsyncSimilarityEvaluator(model_config)
+    def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs):
+        """
+        Evaluate similarity.
+        :keyword query: The query to be evaluated.
+        :paramtype query: str
+        :keyword response: The response to be evaluated.
+        :paramtype response: str
+        :keyword ground_truth: The ground truth to be evaluated.
+        :paramtype ground_truth: str
+        :return: The similarity score.
+        :rtype: Dict[str, float]
+        """
+        return async_run_allowing_running_loop(
+            self._async_evaluator, query=query, response=response, ground_truth=ground_truth, **kwargs
+        )
+    def _to_async(self):
+        return self._async_evaluator

azure/ai/evaluation/_evaluators/_similarity/similarity.prompty ADDED Viewed

@@ -0,0 +1,66 @@
+---
+name: Similarity
+description: Evaluates similarity score for QA scenario
+model:
+  api: chat
+  parameters:
+    temperature: 0.0
+    max_tokens: 1
+    top_p: 1.0
+    presence_penalty: 0
+    frequency_penalty: 0
+    response_format:
+      type: text
+inputs:
+  query:
+    type: string
+  response:
+    type: string
+  ground_truth:
+    type: string
+---
+system:
+You are an AI assistant. You will be given the definition of an evaluation metric for assessing the quality of an answer in a question-answering task. Your job is to compute an accurate evaluation score using the provided evaluation metric. You should return a single integer value between 1 to 5 representing the evaluation metric. You will include no other text or information.
+user:
+Equivalence, as a metric, measures the similarity between the predicted answer and the correct answer. If the information and content in the predicted answer is similar or equivalent to the correct answer, then the value of the Equivalence metric should be high, else it should be low. Given the question, correct answer, and predicted answer, determine the value of Equivalence metric using the following rating scale:
+One star: the predicted answer is not at all similar to the correct answer
+Two stars: the predicted answer is mostly not similar to the correct answer
+Three stars: the predicted answer is somewhat similar to the correct answer
+Four stars: the predicted answer is mostly similar to the correct answer
+Five stars: the predicted answer is completely similar to the correct answer
+This rating value should always be an integer between 1 and 5. So the rating produced should be 1 or 2 or 3 or 4 or 5.
+The examples below show the Equivalence score for a question, a correct answer, and a predicted answer.
+question: What is the role of ribosomes?
+correct answer: Ribosomes are cellular structures responsible for protein synthesis. They interpret the genetic information carried by messenger RNA (mRNA) and use it to assemble amino acids into proteins.
+predicted answer: Ribosomes participate in carbohydrate breakdown by removing nutrients from complex sugar molecules.
+stars: 1
+question: Why did the Titanic sink?
+correct answer: The Titanic sank after it struck an iceberg during its maiden voyage in 1912. The impact caused the ship's hull to breach, allowing water to flood into the vessel. The ship's design, lifeboat shortage, and lack of timely rescue efforts contributed to the tragic loss of life.
+predicted answer: The sinking of the Titanic was a result of a large iceberg collision. This caused the ship to take on water and eventually sink, leading to the death of many passengers due to a shortage of lifeboats and insufficient rescue attempts.
+stars: 2
+question: What causes seasons on Earth?
+correct answer: Seasons on Earth are caused by the tilt of the Earth's axis and its revolution around the Sun. As the Earth orbits the Sun, the tilt causes different parts of the planet to receive varying amounts of sunlight, resulting in changes in temperature and weather patterns.
+predicted answer: Seasons occur because of the Earth's rotation and its elliptical orbit around the Sun. The tilt of the Earth's axis causes regions to be subjected to different sunlight intensities, which leads to temperature fluctuations and alternating weather conditions.
+stars: 3
+question: How does photosynthesis work?
+correct answer: Photosynthesis is a process by which green plants and some other organisms convert light energy into chemical energy. This occurs as light is absorbed by chlorophyll molecules, and then carbon dioxide and water are converted into glucose and oxygen through a series of reactions.
+predicted answer: In photosynthesis, sunlight is transformed into nutrients by plants and certain microorganisms. Light is captured by chlorophyll molecules, followed by the conversion of carbon dioxide and water into sugar and oxygen through multiple reactions.
+stars: 4
+question: What are the health benefits of regular exercise?
+correct answer: Regular exercise can help maintain a healthy weight, increase muscle and bone strength, and reduce the risk of chronic diseases. It also promotes mental well-being by reducing stress and improving overall mood.
+predicted answer: Routine physical activity can contribute to maintaining ideal body weight, enhancing muscle and bone strength, and preventing chronic illnesses. In addition, it supports mental health by alleviating stress and augmenting general mood.
+stars: 5
+question: {{query}}
+correct answer:{{ground_truth}}
+predicted answer: {{response}}
+stars:

azure/ai/evaluation/_evaluators/_xpia/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .xpia import IndirectAttackEvaluator
+__all__ = [
+    "IndirectAttackEvaluator",
+]

azure/ai/evaluation/_evaluators/_xpia/xpia.py ADDED Viewed

@@ -0,0 +1,125 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import logging
+from typing import Dict, List, Union
+from typing_extensions import overload, override
+from azure.ai.evaluation._common._experimental import experimental
+from azure.ai.evaluation._common.constants import EvaluationMetrics
+from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
+from azure.ai.evaluation._model_configurations import Conversation
+logger = logging.getLogger(__name__)
+@experimental
+class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
+    """
+    Evaluates the indirect attack score for a given query and response or a multi-turn conversation, with reasoning.
+    Indirect attacks, also known as cross-domain prompt injected attacks (XPIA), are when jailbreak attacks are
+    injected into the context of a document or source that may result in an altered, unexpected behavior.
+    Indirect attacks evaluations are broken down into three subcategories:
+        - Manipulated Content: This category involves commands that aim to alter or fabricate information, often
+            to mislead or deceive. It includes actions like spreading false information, altering language or
+            formatting, and hiding or emphasizing specific details. The goal is often to manipulate perceptions
+            or behaviors by controlling the flow and presentation of information.
+        - Intrusion: This category encompasses commands that attempt to breach systems, gain unauthorized access,
+            or elevate privileges illicitly. It includes creating backdoors, exploiting vulnerabilities, and
+            traditional jailbreaks to bypass security measures. The intent is often to gain control or access
+            sensitive data without detection.
+        - Information Gathering: This category pertains to accessing, deleting, or modifying data without
+            authorization, often for malicious purposes. It includes exfiltrating sensitive data, tampering with
+            system records, and removing or altering existing information. The focus is on acquiring or
+            manipulating data to exploit or compromise systems and individuals.
+    Indirect attack scores are boolean values, where True indicates that the response contains an indirect attack.
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
+    :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
+        name.
+    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
+            :start-after: [START indirect_attack_evaluator]
+            :end-before: [END indirect_attack_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call an IndirectAttackEvaluator.
+    """
+    id = "azureml://registries/azureml/models/Indirect-Attack-Evaluator/versions/3"
+    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    @override
+    def __init__(
+        self,
+        credential,
+        azure_ai_project,
+    ):
+        super().__init__(
+            eval_metric=EvaluationMetrics.XPIA,
+            azure_ai_project=azure_ai_project,
+            credential=credential,
+        )
+    @overload
+    def __call__(
+        self,
+        *,
+        query: str,
+        response: str,
+    ) -> Dict[str, Union[str, bool]]:
+        """Evaluate whether cross domain injected attacks are present in given query/response
+        :keyword query: The query to be evaluated.
+        :paramtype query: str
+        :keyword response: The response to be evaluated.
+        :paramtype response: str
+        :return: The cross domain injection attack score
+        :rtype: Dict[str, Union[str, bool]]
+        """
+    @overload
+    def __call__(
+        self,
+        *,
+        conversation: Conversation,
+    ) -> Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]:
+        """Evaluate cross domain injected attacks are present in a conversation
+        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
+            key "messages", and potentially a global context under the key "context". Conversation turns are expected
+            to be dictionaries with keys "content", "role", and possibly "context".
+        :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
+        :return: The cross domain injection attack score
+        :rtype: Dict[str, Union[str, bool, Dict[str, List[Union[str, bool]]]]]
+        """
+    @override
+    def __call__(  # pylint: disable=docstring-missing-param
+        self,
+        *args,
+        **kwargs,
+    ):
+        """
+        Evaluate whether cross domain injected attacks are present in your AI system's response.
+        :keyword query: The query to be evaluated.
+        :paramtype query: Optional[str]
+        :keyword response: The response to be evaluated.
+        :paramtype response: Optional[str]
+        :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
+            key "messages". Conversation turns are expected
+            to be dictionaries with keys "content" and "role".
+        :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
+        :return: The cross domain injection attack score
+        :rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]]
+        """
+        return super().__call__(*args, **kwargs)

azure/ai/evaluation/_exceptions.py ADDED Viewed

@@ -0,0 +1,128 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""This includes enums and classes for exceptions for use in azure-ai-evaluation."""
+from enum import Enum
+from typing import Optional
+from azure.core.exceptions import AzureError
+class ErrorCategory(Enum):
+    """Error category to be specified when using EvaluationException class.
+    When using EvaluationException, specify the type that best describes the nature of the error being captured.
+    * INVALID_VALUE -> One or more inputs are invalid (e.g. incorrect type or format)
+    * UNKNOWN_FIELD -> A least one unrecognized parameter is specified
+    * MISSING_FIELD -> At least one required parameter is missing
+    * FILE_OR_FOLDER_NOT_FOUND -> One or more files or folder paths do not exist
+    * RESOURCE_NOT_FOUND -> Resource could not be found
+    * FAILED_EXECUTION -> Execution failed
+    * SERVICE_UNAVAILABLE -> Service is unavailable
+    * MISSING_PACKAGE -> Required package is missing
+    * FAILED_REMOTE_TRACKING -> Remote tracking failed
+    * PROJECT_ACCESS_ERROR -> Access to project failed
+    * UNKNOWN -> Undefined placeholder. Avoid using.
+    """
+    INVALID_VALUE = "INVALID VALUE"
+    UNKNOWN_FIELD = "UNKNOWN FIELD"
+    MISSING_FIELD = "MISSING FIELD"
+    FILE_OR_FOLDER_NOT_FOUND = "FILE OR FOLDER NOT FOUND"
+    RESOURCE_NOT_FOUND = "RESOURCE NOT FOUND"
+    FAILED_EXECUTION = "FAILED_EXECUTION"
+    SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE"
+    MISSING_PACKAGE = "MISSING PACKAGE"
+    FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
+    PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
+    UNKNOWN = "UNKNOWN"
+class ErrorBlame(Enum):
+    """Source of blame to be specified when using EvaluationException class.
+    When using EvaluationException, specify whether the error is due to user actions or the system.
+    """
+    USER_ERROR = "UserError"
+    SYSTEM_ERROR = "SystemError"
+    UNKNOWN = "Unknown"
+class ErrorTarget(Enum):
+    """Error target to be specified when using EvaluationException class.
+    When using EvaluationException, specify the code area that was being targeted when the
+    exception was triggered.
+    """
+    EVAL_RUN = "EvalRun"
+    CODE_CLIENT = "CodeClient"
+    RAI_CLIENT = "RAIClient"
+    COHERENCE_EVALUATOR = "CoherenceEvaluator"
+    CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
+    CONTENT_SAFETY_MULTIMODAL_EVALUATOR = "ContentSafetyMultimodalEvaluator"
+    ECI_EVALUATOR = "ECIEvaluator"
+    F1_EVALUATOR = "F1Evaluator"
+    GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
+    PROTECTED_MATERIAL_EVALUATOR = "ProtectedMaterialEvaluator"
+    RELEVANCE_EVALUATOR = "RelevanceEvaluator"
+    SIMILARITY_EVALUATOR = "SimilarityEvaluator"
+    FLUENCY_EVALUATOR = "FluencyEvaluator"
+    RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
+    INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
+    INDIRECT_ATTACK_SIMULATOR = "IndirectAttackSimulator"
+    ADVERSARIAL_SIMULATOR = "AdversarialSimulator"
+    DIRECT_ATTACK_SIMULATOR = "DirectAttackSimulator"
+    EVALUATE = "Evaluate"
+    CALLBACK_CONVERSATION_BOT = "CallbackConversationBot"
+    MODELS = "Models"
+    UNKNOWN = "Unknown"
+    CONVERSATION = "Conversation"
+class EvaluationException(AzureError):
+    """The base class for all exceptions raised in azure-ai-evaluation. If there is a need to define a custom
+    exception type, that custom exception type should extend from this class.
+    :param message: A message describing the error. This is the error message the user will see.
+    :type message: str
+    :param internal_message: The error message without any personal data. This will be pushed to telemetry logs.
+    :type internal_message: str
+    :param target: The name of the element that caused the exception to be thrown.
+    :type target: ~azure.ai.evaluation._exceptions.ErrorTarget
+    :param category: The error category, defaults to Unknown.
+    :type category: ~azure.ai.evaluation._exceptions.ErrorCategory
+    :param blame: The source of blame for the error, defaults to Unknown.
+    :type balance: ~azure.ai.evaluation._exceptions.ErrorBlame
+    :param tsg_link: A link to the TSG page for troubleshooting the error.
+    :type tsg_link: str
+    """
+    def __init__(
+        self,
+        message: str,
+        *args,
+        internal_message: Optional[str] = None,
+        target: ErrorTarget = ErrorTarget.UNKNOWN,
+        category: ErrorCategory = ErrorCategory.UNKNOWN,
+        blame: ErrorBlame = ErrorBlame.UNKNOWN,
+        tsg_link: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        self.category = category
+        self.target = target
+        self.blame = blame
+        self.internal_message = internal_message
+        self.tsg_link = tsg_link
+        super().__init__(message, *args, **kwargs)
+    def __str__(self):
+        error_blame = "InternalError" if self.blame != ErrorBlame.USER_ERROR else "UserError"
+        msg = f"({error_blame}) {super().__str__()}"
+        if self.tsg_link:
+            msg += f"\nVisit {self.tsg_link} to troubleshoot this issue."
+        return msg

azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl

azure-ai-evaluation 0.0.0b0py3-none-any.whl → 1.0.0py3-none-any.whl