azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
from typing_extensions import overload, override
|
|
5
|
+
from typing import Dict, Union
|
|
6
|
+
|
|
7
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
8
|
+
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
9
|
+
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@experimental
|
|
13
|
+
class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
14
|
+
"""
|
|
15
|
+
Evaluates ungrounded inference of human attributes for a given query, response, and context for a single-turn evaluation only,
|
|
16
|
+
where query represents the user query and response represents the AI system response given the provided context.
|
|
17
|
+
|
|
18
|
+
Ungrounded Attributes checks for whether a response is first, ungrounded, and checks if it contains information about protected class or
|
|
19
|
+
emotional state of a person.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
It identifies the following attributes:
|
|
23
|
+
|
|
24
|
+
- emotional_state
|
|
25
|
+
- protected_class
|
|
26
|
+
- groundedness
|
|
27
|
+
|
|
28
|
+
:param credential: The credential for connecting to Azure AI project. Required
|
|
29
|
+
:type credential: ~azure.core.credentials.TokenCredential
|
|
30
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
31
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
32
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
33
|
+
:param kwargs: Additional arguments to pass to the evaluator.
|
|
34
|
+
:type kwargs: Any
|
|
35
|
+
|
|
36
|
+
.. note::
|
|
37
|
+
|
|
38
|
+
If this evaluator is supplied to the `evaluate` function, the metric
|
|
39
|
+
for the ungrounded attributes will be "ungrounded_attributes_label".
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
id = "azureai://built-in/evaluators/ungrounded_attributes"
|
|
43
|
+
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
44
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
45
|
+
|
|
46
|
+
@override
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
credential,
|
|
50
|
+
azure_ai_project,
|
|
51
|
+
**kwargs,
|
|
52
|
+
):
|
|
53
|
+
# Set default for evaluate_query if not provided
|
|
54
|
+
if "evaluate_query" not in kwargs:
|
|
55
|
+
kwargs["evaluate_query"] = True
|
|
56
|
+
|
|
57
|
+
super().__init__(
|
|
58
|
+
eval_metric=EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
|
|
59
|
+
azure_ai_project=azure_ai_project,
|
|
60
|
+
credential=credential,
|
|
61
|
+
**kwargs,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
@overload
|
|
65
|
+
def __call__(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
query: str,
|
|
69
|
+
response: str,
|
|
70
|
+
context: str,
|
|
71
|
+
) -> Dict[str, Union[str, float]]:
|
|
72
|
+
"""Evaluate a given query/response pair and context for ungrounded attributes
|
|
73
|
+
|
|
74
|
+
:keyword query: The query to be evaluated.
|
|
75
|
+
:paramtype query: str
|
|
76
|
+
:keyword response: The response to be evaluated.
|
|
77
|
+
:paramtype response: str
|
|
78
|
+
:keyword context: The context to be used for evaluation.
|
|
79
|
+
:paramtype context: str
|
|
80
|
+
:return: The ungrounded attributes label.
|
|
81
|
+
:rtype: Dict[str, Union[str, bool]]
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@override
|
|
85
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
86
|
+
self,
|
|
87
|
+
*args,
|
|
88
|
+
**kwargs,
|
|
89
|
+
):
|
|
90
|
+
"""Evaluate a given query/response pair and context for ungrounded attributes
|
|
91
|
+
|
|
92
|
+
:keyword query: The query to be evaluated.
|
|
93
|
+
:paramtype query: str
|
|
94
|
+
:keyword response: The response to be evaluated.
|
|
95
|
+
:paramtype response: str
|
|
96
|
+
:keyword context: The context to be used for evaluation.
|
|
97
|
+
:paramtype context: str
|
|
98
|
+
:return: The ungrounded attributes label.
|
|
99
|
+
:rtype: Dict[str, Union[str, bool]]
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
return super().__call__(*args, **kwargs)
|
|
@@ -2,138 +2,140 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import logging
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Dict, List, Union
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from typing_extensions import overload, override
|
|
8
8
|
|
|
9
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
9
10
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
|
-
from azure.ai.evaluation._common
|
|
11
|
-
from azure.ai.evaluation.
|
|
11
|
+
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
12
|
+
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
17
|
+
@experimental
|
|
18
|
+
class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
19
|
+
"""
|
|
20
|
+
Evaluates the indirect attack score for a given query and response or a multi-turn conversation, with reasoning.
|
|
21
|
+
|
|
22
|
+
Indirect attacks, also known as cross-domain prompt injected attacks (XPIA), are when jailbreak attacks are
|
|
23
|
+
injected into the context of a document or source that may result in an altered, unexpected behavior.
|
|
24
|
+
|
|
25
|
+
Indirect attacks evaluations are broken down into three subcategories:
|
|
26
|
+
- Manipulated Content: This category involves commands that aim to alter or fabricate information, often
|
|
27
|
+
to mislead or deceive. It includes actions like spreading false information, altering language or
|
|
28
|
+
formatting, and hiding or emphasizing specific details. The goal is often to manipulate perceptions
|
|
29
|
+
or behaviors by controlling the flow and presentation of information.
|
|
30
|
+
- Intrusion: This category encompasses commands that attempt to breach systems, gain unauthorized access,
|
|
31
|
+
or elevate privileges illicitly. It includes creating backdoors, exploiting vulnerabilities, and
|
|
32
|
+
traditional jailbreaks to bypass security measures. The intent is often to gain control or access
|
|
33
|
+
sensitive data without detection.
|
|
34
|
+
- Information Gathering: This category pertains to accessing, deleting, or modifying data without
|
|
35
|
+
authorization, often for malicious purposes. It includes exfiltrating sensitive data, tampering with
|
|
36
|
+
system records, and removing or altering existing information. The focus is on acquiring or
|
|
37
|
+
manipulating data to exploit or compromise systems and individuals.
|
|
38
|
+
|
|
39
|
+
Indirect attack scores are boolean values, where True indicates that the response contains an indirect attack.
|
|
40
|
+
|
|
41
|
+
:param credential: The credential for connecting to Azure AI project. Required
|
|
28
42
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
29
|
-
:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
43
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
44
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
45
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
46
|
+
:param threshold: The threshold for the IndirectAttack evaluator. Default is 0.
|
|
47
|
+
:type threshold: int
|
|
48
|
+
|
|
49
|
+
.. admonition:: Example:
|
|
50
|
+
|
|
51
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate.py
|
|
52
|
+
:start-after: [START indirect_attack_evaluator]
|
|
53
|
+
:end-before: [END indirect_attack_evaluator]
|
|
54
|
+
:language: python
|
|
55
|
+
:dedent: 8
|
|
56
|
+
:caption: Initialize and call an IndirectAttackEvaluator.
|
|
57
|
+
|
|
58
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
59
|
+
|
|
60
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
61
|
+
:start-after: [START indirect_attack_evaluator]
|
|
62
|
+
:end-before: [END indirect_attack_evaluator]
|
|
63
|
+
:language: python
|
|
64
|
+
:dedent: 8
|
|
65
|
+
:caption: Initialize and call IndirectAttackEvaluator using Azure AI Project URL in the following format
|
|
66
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
39
67
|
|
|
40
|
-
**Output format for question-answer pair**
|
|
41
|
-
|
|
42
|
-
.. code-block:: python
|
|
43
|
-
|
|
44
|
-
{
|
|
45
|
-
'xpia_label': False,
|
|
46
|
-
'xpia_reason': 'The conversation does not contain any manipulated content, intrusion or
|
|
47
|
-
information gathering.'
|
|
48
|
-
'xpia_information_gathering': False,
|
|
49
|
-
'xpia_intrusion': False
|
|
50
|
-
'xpia_manipulated_content': False
|
|
51
|
-
}
|
|
52
68
|
"""
|
|
53
69
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
70
|
+
id = "azureai://built-in/evaluators/indirect_attack"
|
|
71
|
+
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
72
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
57
73
|
|
|
58
|
-
|
|
74
|
+
@override
|
|
75
|
+
def __init__(
|
|
59
76
|
self,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
response: Optional[str],
|
|
77
|
+
credential,
|
|
78
|
+
azure_ai_project,
|
|
63
79
|
**kwargs,
|
|
64
80
|
):
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
:keyword response: The response to be evaluated. Mutually exclusive with 'conversation'.
|
|
72
|
-
:paramtype response: Optional[str]
|
|
73
|
-
:return: The evaluation scores and reasoning.
|
|
74
|
-
:rtype: dict
|
|
75
|
-
"""
|
|
76
|
-
|
|
77
|
-
return self._evaluator(query=query, response=response, **kwargs)
|
|
78
|
-
|
|
81
|
+
super().__init__(
|
|
82
|
+
eval_metric=EvaluationMetrics.XPIA,
|
|
83
|
+
azure_ai_project=azure_ai_project,
|
|
84
|
+
credential=credential,
|
|
85
|
+
**kwargs,
|
|
86
|
+
)
|
|
79
87
|
|
|
80
|
-
|
|
81
|
-
def
|
|
82
|
-
self
|
|
83
|
-
|
|
88
|
+
@overload
|
|
89
|
+
def __call__(
|
|
90
|
+
self,
|
|
91
|
+
*,
|
|
92
|
+
query: str,
|
|
93
|
+
response: str,
|
|
94
|
+
) -> Dict[str, Union[str, bool]]:
|
|
95
|
+
"""Evaluate whether cross domain injected attacks are present in given query/response
|
|
84
96
|
|
|
85
|
-
async def __call__(self, *, query: str, response: str, **kwargs):
|
|
86
|
-
"""
|
|
87
|
-
Evaluates content according to this evaluator's metric.
|
|
88
97
|
:keyword query: The query to be evaluated.
|
|
89
98
|
:paramtype query: str
|
|
90
99
|
:keyword response: The response to be evaluated.
|
|
91
100
|
:paramtype response: str
|
|
92
|
-
:return: The
|
|
93
|
-
:rtype:
|
|
101
|
+
:return: The cross domain injection attack score
|
|
102
|
+
:rtype: Dict[str, Union[str, bool]]
|
|
94
103
|
"""
|
|
95
|
-
# Validate inputs
|
|
96
|
-
# Raises value error if failed, so execution alone signifies success.
|
|
97
|
-
if not (query and query.strip() and query != "None") or not (
|
|
98
|
-
response and response.strip() and response != "None"
|
|
99
|
-
):
|
|
100
|
-
msg = "Both 'query' and 'response' must be non-empty strings."
|
|
101
|
-
raise EvaluationException(
|
|
102
|
-
message=msg,
|
|
103
|
-
internal_message=msg,
|
|
104
|
-
error_category=ErrorCategory.MISSING_FIELD,
|
|
105
|
-
error_blame=ErrorBlame.USER_ERROR,
|
|
106
|
-
error_target=ErrorTarget.INDIRECT_ATTACK_EVALUATOR,
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# Run score computation based on supplied metric.
|
|
110
|
-
result = await evaluate_with_rai_service(
|
|
111
|
-
metric_name=EvaluationMetrics.XPIA,
|
|
112
|
-
query=query,
|
|
113
|
-
response=response,
|
|
114
|
-
project_scope=self._azure_ai_project,
|
|
115
|
-
credential=self._credential,
|
|
116
|
-
)
|
|
117
|
-
return result
|
|
118
104
|
|
|
105
|
+
@overload
|
|
106
|
+
def __call__(
|
|
107
|
+
self,
|
|
108
|
+
*,
|
|
109
|
+
conversation: Conversation,
|
|
110
|
+
) -> Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]:
|
|
111
|
+
"""Evaluate cross domain injected attacks are present in a conversation
|
|
112
|
+
|
|
113
|
+
:keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
|
|
114
|
+
key "messages", and potentially a global context under the key "context". Conversation turns are expected
|
|
115
|
+
to be dictionaries with keys "content", "role", and possibly "context".
|
|
116
|
+
:paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
|
|
117
|
+
:return: The cross domain injection attack score
|
|
118
|
+
:rtype: Dict[str, Union[str, bool, Dict[str, List[Union[str, bool]]]]]
|
|
119
|
+
"""
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
def
|
|
122
|
-
self
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
@override
|
|
122
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
123
|
+
self,
|
|
124
|
+
*args,
|
|
125
|
+
**kwargs,
|
|
126
|
+
):
|
|
125
127
|
"""
|
|
126
|
-
|
|
128
|
+
Evaluate whether cross domain injected attacks are present in your AI system's response.
|
|
129
|
+
|
|
127
130
|
:keyword query: The query to be evaluated.
|
|
128
|
-
:paramtype query: str
|
|
131
|
+
:paramtype query: Optional[str]
|
|
129
132
|
:keyword response: The response to be evaluated.
|
|
130
|
-
:paramtype response: str
|
|
131
|
-
:keyword
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
:
|
|
133
|
+
:paramtype response: Optional[str]
|
|
134
|
+
:keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
|
|
135
|
+
key "messages". Conversation turns are expected
|
|
136
|
+
to be dictionaries with keys "content" and "role".
|
|
137
|
+
:paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
|
|
138
|
+
:return: The cross domain injection attack score
|
|
139
|
+
:rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]]
|
|
135
140
|
"""
|
|
136
|
-
return
|
|
137
|
-
|
|
138
|
-
def _to_async(self):
|
|
139
|
-
return self._async_evaluator
|
|
141
|
+
return super().__call__(*args, **kwargs)
|
|
@@ -4,10 +4,20 @@
|
|
|
4
4
|
"""This includes enums and classes for exceptions for use in azure-ai-evaluation."""
|
|
5
5
|
|
|
6
6
|
from enum import Enum
|
|
7
|
+
from typing import Optional
|
|
7
8
|
|
|
8
9
|
from azure.core.exceptions import AzureError
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
class ErrorMessage(Enum):
|
|
13
|
+
"""Error messages to be used when raising EvaluationException.
|
|
14
|
+
|
|
15
|
+
These messages are used to provide a consistent error message format across the SDK.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
MALFORMED_CONVERSATION_HISTORY = "Malformed Conversation History: Query parameter representing conversation history should have exactly one more user query than agent responses"
|
|
19
|
+
|
|
20
|
+
|
|
11
21
|
class ErrorCategory(Enum):
|
|
12
22
|
"""Error category to be specified when using EvaluationException class.
|
|
13
23
|
|
|
@@ -20,6 +30,9 @@ class ErrorCategory(Enum):
|
|
|
20
30
|
* RESOURCE_NOT_FOUND -> Resource could not be found
|
|
21
31
|
* FAILED_EXECUTION -> Execution failed
|
|
22
32
|
* SERVICE_UNAVAILABLE -> Service is unavailable
|
|
33
|
+
* MISSING_PACKAGE -> Required package is missing
|
|
34
|
+
* FAILED_REMOTE_TRACKING -> Remote tracking failed
|
|
35
|
+
* PROJECT_ACCESS_ERROR -> Access to project failed
|
|
23
36
|
* UNKNOWN -> Undefined placeholder. Avoid using.
|
|
24
37
|
"""
|
|
25
38
|
|
|
@@ -30,7 +43,12 @@ class ErrorCategory(Enum):
|
|
|
30
43
|
RESOURCE_NOT_FOUND = "RESOURCE NOT FOUND"
|
|
31
44
|
FAILED_EXECUTION = "FAILED_EXECUTION"
|
|
32
45
|
SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE"
|
|
46
|
+
MISSING_PACKAGE = "MISSING PACKAGE"
|
|
47
|
+
FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
|
|
48
|
+
PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
|
|
33
49
|
UNKNOWN = "UNKNOWN"
|
|
50
|
+
UPLOAD_ERROR = "UPLOAD ERROR"
|
|
51
|
+
NOT_APPLICABLE = "NOT APPLICABLE"
|
|
34
52
|
|
|
35
53
|
|
|
36
54
|
class ErrorBlame(Enum):
|
|
@@ -54,15 +72,22 @@ class ErrorTarget(Enum):
|
|
|
54
72
|
EVAL_RUN = "EvalRun"
|
|
55
73
|
CODE_CLIENT = "CodeClient"
|
|
56
74
|
RAI_CLIENT = "RAIClient"
|
|
57
|
-
CHAT_EVALUATOR = "ChatEvaluator"
|
|
58
75
|
COHERENCE_EVALUATOR = "CoherenceEvaluator"
|
|
76
|
+
COMPLETENESS_EVALUATOR = "CompletenessEvaluator"
|
|
59
77
|
CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
|
|
60
78
|
ECI_EVALUATOR = "ECIEvaluator"
|
|
61
79
|
F1_EVALUATOR = "F1Evaluator"
|
|
62
80
|
GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
|
|
81
|
+
TASK_NAVIGATION_EFFICIENCY_EVALUATOR = "_TaskNavigationEfficiencyEvaluator"
|
|
63
82
|
PROTECTED_MATERIAL_EVALUATOR = "ProtectedMaterialEvaluator"
|
|
83
|
+
INTENT_RESOLUTION_EVALUATOR = "IntentResolutionEvaluator"
|
|
64
84
|
RELEVANCE_EVALUATOR = "RelevanceEvaluator"
|
|
65
85
|
SIMILARITY_EVALUATOR = "SimilarityEvaluator"
|
|
86
|
+
FLUENCY_EVALUATOR = "FluencyEvaluator"
|
|
87
|
+
RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
|
|
88
|
+
TOOL_SUCCESS_EVALUATOR = "_ToolSuccessEvaluator"
|
|
89
|
+
TASK_ADHERENCE_EVALUATOR = "TaskAdherenceEvaluator"
|
|
90
|
+
TASK_COMPLETION_EVALUATOR = "_TaskCompletionEvaluator"
|
|
66
91
|
INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
|
|
67
92
|
INDIRECT_ATTACK_SIMULATOR = "IndirectAttackSimulator"
|
|
68
93
|
ADVERSARIAL_SIMULATOR = "AdversarialSimulator"
|
|
@@ -72,10 +97,17 @@ class ErrorTarget(Enum):
|
|
|
72
97
|
MODELS = "Models"
|
|
73
98
|
UNKNOWN = "Unknown"
|
|
74
99
|
CONVERSATION = "Conversation"
|
|
100
|
+
TOOL_CALL_ACCURACY_EVALUATOR = "ToolCallAccuracyEvaluator"
|
|
101
|
+
TOOL_SELECTION_EVALUATOR = "_ToolSelectionEvaluator"
|
|
102
|
+
TOOL_INPUT_ACCURACY_EVALUATOR = "_ToolInputAccuracyEvaluator"
|
|
103
|
+
RED_TEAM = "RedTeam"
|
|
104
|
+
AOAI_GRADER = "AoaiGrader"
|
|
105
|
+
CONVERSATION_HISTORY_PARSING = "_get_conversation_history"
|
|
106
|
+
TOOL_OUTPUT_UTILIZATION_EVALUATOR = "ToolOutputUtilizationEvaluator"
|
|
75
107
|
|
|
76
108
|
|
|
77
109
|
class EvaluationException(AzureError):
|
|
78
|
-
"""The base class for all exceptions raised in
|
|
110
|
+
"""The base class for all exceptions raised in azure-ai-evaluation. If there is a need to define a custom
|
|
79
111
|
exception type, that custom exception type should extend from this class.
|
|
80
112
|
|
|
81
113
|
:param message: A message describing the error. This is the error message the user will see.
|
|
@@ -84,24 +116,36 @@ class EvaluationException(AzureError):
|
|
|
84
116
|
:type internal_message: str
|
|
85
117
|
:param target: The name of the element that caused the exception to be thrown.
|
|
86
118
|
:type target: ~azure.ai.evaluation._exceptions.ErrorTarget
|
|
87
|
-
:param
|
|
88
|
-
:type
|
|
89
|
-
:param
|
|
90
|
-
:type
|
|
119
|
+
:param category: The error category, defaults to Unknown.
|
|
120
|
+
:type category: ~azure.ai.evaluation._exceptions.ErrorCategory
|
|
121
|
+
:param blame: The source of blame for the error, defaults to Unknown.
|
|
122
|
+
:type balance: ~azure.ai.evaluation._exceptions.ErrorBlame
|
|
123
|
+
:param tsg_link: A link to the TSG page for troubleshooting the error.
|
|
124
|
+
:type tsg_link: str
|
|
91
125
|
"""
|
|
92
126
|
|
|
93
127
|
def __init__(
|
|
94
128
|
self,
|
|
95
129
|
message: str,
|
|
96
|
-
internal_message: str,
|
|
97
130
|
*args,
|
|
131
|
+
internal_message: Optional[str] = None,
|
|
98
132
|
target: ErrorTarget = ErrorTarget.UNKNOWN,
|
|
99
133
|
category: ErrorCategory = ErrorCategory.UNKNOWN,
|
|
100
134
|
blame: ErrorBlame = ErrorBlame.UNKNOWN,
|
|
135
|
+
tsg_link: Optional[str] = None,
|
|
101
136
|
**kwargs,
|
|
102
137
|
) -> None:
|
|
103
138
|
self.category = category
|
|
104
139
|
self.target = target
|
|
105
140
|
self.blame = blame
|
|
106
141
|
self.internal_message = internal_message
|
|
142
|
+
self.tsg_link = tsg_link
|
|
107
143
|
super().__init__(message, *args, **kwargs)
|
|
144
|
+
|
|
145
|
+
def __str__(self):
|
|
146
|
+
error_blame = "InternalError" if self.blame != ErrorBlame.USER_ERROR else "UserError"
|
|
147
|
+
msg = f"({error_blame}) {super().__str__()}"
|
|
148
|
+
if self.tsg_link:
|
|
149
|
+
msg += f"\nVisit {self.tsg_link} to troubleshoot this issue."
|
|
150
|
+
|
|
151
|
+
return msg
|