azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +85 -14
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/_common/constants.py +124 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +578 -69
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +505 -27
- azure/ai/evaluation/_constants.py +147 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +87 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
- azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
- azure/ai/evaluation/_evaluate/_utils.py +237 -42
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
- azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
- azure/ai/evaluation/_exceptions.py +24 -1
- azure/ai/evaluation/_http_utils.py +7 -5
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
- azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- azure/ai/evaluation/simulator/_simulator.py +43 -19
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
- azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
from
|
|
5
|
-
from typing import Callable, Dict, List, Union
|
|
4
|
+
from typing import Dict, List, Union
|
|
6
5
|
|
|
7
|
-
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
|
|
8
6
|
from typing_extensions import overload, override
|
|
9
7
|
|
|
10
|
-
from azure.ai.evaluation._evaluators._common import
|
|
8
|
+
from azure.ai.evaluation._evaluators._common import MultiEvaluatorBase
|
|
11
9
|
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
10
|
from azure.ai.evaluation._common._experimental import experimental
|
|
13
11
|
|
|
@@ -18,15 +16,25 @@ from ._violence import ViolenceEvaluator
|
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
@experimental
|
|
21
|
-
class ContentSafetyEvaluator(
|
|
19
|
+
class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
|
|
22
20
|
"""
|
|
23
|
-
Initialize a content safety evaluator configured to evaluate content
|
|
21
|
+
Initialize a content safety evaluator configured to evaluate content safety metrics for QA scenario.
|
|
24
22
|
|
|
25
23
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
26
24
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
27
|
-
:param azure_ai_project: The
|
|
28
|
-
It contains subscription id, resource group, and project name.
|
|
29
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
25
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
26
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
27
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
28
|
+
:param violence_threshold: The threshold for violence evaluation. Default is 3.
|
|
29
|
+
:type violence_threshold: int
|
|
30
|
+
:param sexual_threshold: The threshold for sexual content evaluation. Default is 3.
|
|
31
|
+
:type sexual_threshold: int
|
|
32
|
+
:param self_harm_threshold: The threshold for self-harm evaluation. Default is 3.
|
|
33
|
+
:type self_harm_threshold: int
|
|
34
|
+
:param hate_unfairness_threshold: The threshold for hate/unfairness evaluation. Default is 3.
|
|
35
|
+
:type hate_unfairness_threshold: int
|
|
36
|
+
:param evaluate_query: Whether to also evaluate the query in addition to the response. Default is False.
|
|
37
|
+
:type evaluate_query: bool
|
|
30
38
|
:param kwargs: Additional arguments to pass to the evaluator.
|
|
31
39
|
:type kwargs: Any
|
|
32
40
|
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
|
|
@@ -38,22 +46,67 @@ class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
|
|
|
38
46
|
:end-before: [END content_safety_evaluator]
|
|
39
47
|
:language: python
|
|
40
48
|
:dedent: 8
|
|
41
|
-
:caption: Initialize and call
|
|
49
|
+
:caption: Initialize and call ContentSafetyEvaluator using azure.ai.evaluation.AzureAIProject.
|
|
50
|
+
|
|
51
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
52
|
+
|
|
53
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
54
|
+
:start-after: [START content_safety_evaluator]
|
|
55
|
+
:end-before: [END content_safety_evaluator]
|
|
56
|
+
:language: python
|
|
57
|
+
:dedent: 8
|
|
58
|
+
:caption: Initialize and call ContentSafetyEvaluator using Azure AI Project URL in the following format
|
|
59
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
60
|
+
|
|
61
|
+
.. admonition:: Example with Threshold:
|
|
62
|
+
|
|
63
|
+
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
64
|
+
:start-after: [START threshold_content_safety_evaluator]
|
|
65
|
+
:end-before: [END threshold_content_safety_evaluator]
|
|
66
|
+
:language: python
|
|
67
|
+
:dedent: 8
|
|
68
|
+
:caption: Initialize with threshold and call a ContentSafetyEvaluator with a query and response.
|
|
42
69
|
"""
|
|
43
70
|
|
|
44
|
-
id = "content_safety"
|
|
71
|
+
id = "azureai://built-in/evaluators/content_safety"
|
|
45
72
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
73
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
46
74
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
credential,
|
|
78
|
+
azure_ai_project,
|
|
79
|
+
*,
|
|
80
|
+
violence_threshold: int = 3,
|
|
81
|
+
sexual_threshold: int = 3,
|
|
82
|
+
self_harm_threshold: int = 3,
|
|
83
|
+
hate_unfairness_threshold: int = 3,
|
|
84
|
+
**kwargs,
|
|
85
|
+
):
|
|
86
|
+
# Type checking
|
|
87
|
+
for name, value in [
|
|
88
|
+
("violence_threshold", violence_threshold),
|
|
89
|
+
("sexual_threshold", sexual_threshold),
|
|
90
|
+
("self_harm_threshold", self_harm_threshold),
|
|
91
|
+
("hate_unfairness_threshold", hate_unfairness_threshold),
|
|
92
|
+
]:
|
|
93
|
+
if not isinstance(value, int):
|
|
94
|
+
raise TypeError(f"{name} must be an int, got {type(value)}")
|
|
95
|
+
|
|
96
|
+
# Extract evaluate_query from kwargs if present
|
|
97
|
+
evaluate_query_kwargs = {}
|
|
98
|
+
if "evaluate_query" in kwargs:
|
|
99
|
+
evaluate_query_kwargs["evaluate_query"] = kwargs["evaluate_query"]
|
|
100
|
+
|
|
101
|
+
evaluators = [
|
|
102
|
+
ViolenceEvaluator(credential, azure_ai_project, threshold=violence_threshold, **evaluate_query_kwargs),
|
|
103
|
+
SexualEvaluator(credential, azure_ai_project, threshold=sexual_threshold, **evaluate_query_kwargs),
|
|
104
|
+
SelfHarmEvaluator(credential, azure_ai_project, threshold=self_harm_threshold, **evaluate_query_kwargs),
|
|
105
|
+
HateUnfairnessEvaluator(
|
|
106
|
+
credential, azure_ai_project, threshold=hate_unfairness_threshold, **evaluate_query_kwargs
|
|
107
|
+
),
|
|
56
108
|
]
|
|
109
|
+
super().__init__(evaluators=evaluators, **kwargs)
|
|
57
110
|
|
|
58
111
|
@overload
|
|
59
112
|
def __call__(
|
|
@@ -109,36 +162,3 @@ class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
|
|
|
109
162
|
:rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[float, Dict[str, List[Union[str, float]]]]]]
|
|
110
163
|
"""
|
|
111
164
|
return super().__call__(*args, **kwargs)
|
|
112
|
-
|
|
113
|
-
@override
|
|
114
|
-
async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]:
|
|
115
|
-
"""Perform the evaluation using the Azure AI RAI service.
|
|
116
|
-
The exact evaluation performed is determined by the evaluation metric supplied
|
|
117
|
-
by the child class initializer.
|
|
118
|
-
|
|
119
|
-
:param eval_input: The input to the evaluation function.
|
|
120
|
-
:type eval_input: Dict
|
|
121
|
-
:return: The evaluation result.
|
|
122
|
-
:rtype: Dict
|
|
123
|
-
"""
|
|
124
|
-
query = eval_input.get("query", None)
|
|
125
|
-
response = eval_input.get("response", None)
|
|
126
|
-
conversation = eval_input.get("conversation", None)
|
|
127
|
-
results: Dict[str, Union[str, float]] = {}
|
|
128
|
-
# TODO fix this to not explode on empty optional inputs (PF SKD error)
|
|
129
|
-
if self._parallel:
|
|
130
|
-
with ThreadPoolExecutor() as executor:
|
|
131
|
-
# pylint: disable=no-value-for-parameter
|
|
132
|
-
futures = {
|
|
133
|
-
executor.submit(query=query, response=response, conversation=conversation): evaluator
|
|
134
|
-
for evaluator in self._evaluators
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
for future in as_completed(futures):
|
|
138
|
-
results.update(future.result())
|
|
139
|
-
else:
|
|
140
|
-
for evaluator in self._evaluators:
|
|
141
|
-
result = evaluator(query=query, response=response, conversation=conversation)
|
|
142
|
-
results.update(result)
|
|
143
|
-
|
|
144
|
-
return results
|
|
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
10
|
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
11
11
|
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
|
+
from azure.ai.evaluation._constants import _AggregationType
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@experimental
|
|
@@ -44,9 +45,11 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
44
45
|
|
|
45
46
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
46
47
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
47
|
-
:param azure_ai_project: The
|
|
48
|
-
It contains subscription id, resource group, and project name.
|
|
49
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
48
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
49
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
50
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
51
|
+
:param threshold: The threshold for the HateUnfairness evaluator. Default is 3.
|
|
52
|
+
:type threshold: int
|
|
50
53
|
|
|
51
54
|
.. admonition:: Example:
|
|
52
55
|
|
|
@@ -55,22 +58,49 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
55
58
|
:end-before: [END hate_unfairness_evaluator]
|
|
56
59
|
:language: python
|
|
57
60
|
:dedent: 8
|
|
58
|
-
:caption: Initialize and call
|
|
61
|
+
:caption: Initialize and call HateUnfairnessEvaluator using azure.ai.evaluation.AzureAIProject.
|
|
62
|
+
|
|
63
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
64
|
+
|
|
65
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
66
|
+
:start-after: [START hate_unfairness_evaluator]
|
|
67
|
+
:end-before: [END hate_unfairness_evaluator]
|
|
68
|
+
:language: python
|
|
69
|
+
:dedent: 8
|
|
70
|
+
:caption: Initialize and call HateUnfairnessEvaluator using Azure AI Project URL in the following format
|
|
71
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
72
|
+
|
|
73
|
+
.. admonition:: Example with Threshold:
|
|
74
|
+
|
|
75
|
+
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
76
|
+
:start-after: [START threshold_hate_unfairness_evaluator]
|
|
77
|
+
:end-before: [END threshold_hate_unfairness_evaluator]
|
|
78
|
+
:language: python
|
|
79
|
+
:dedent: 8
|
|
80
|
+
:caption: Initialize with threshold and call a HateUnfairnessEvaluator with a query and response.
|
|
59
81
|
"""
|
|
60
82
|
|
|
61
|
-
id = "
|
|
83
|
+
id = "azureai://built-in/evaluators/hate_unfairness"
|
|
62
84
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
85
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
63
86
|
|
|
64
87
|
@override
|
|
65
88
|
def __init__(
|
|
66
89
|
self,
|
|
67
90
|
credential,
|
|
68
91
|
azure_ai_project,
|
|
92
|
+
*,
|
|
93
|
+
threshold: int = 3,
|
|
94
|
+
**kwargs,
|
|
69
95
|
):
|
|
70
96
|
super().__init__(
|
|
71
97
|
eval_metric=EvaluationMetrics.HATE_FAIRNESS,
|
|
72
98
|
azure_ai_project=azure_ai_project,
|
|
73
99
|
credential=credential,
|
|
100
|
+
conversation_aggregation_type=_AggregationType.MAX,
|
|
101
|
+
threshold=threshold,
|
|
102
|
+
_higher_is_better=False,
|
|
103
|
+
**kwargs,
|
|
74
104
|
)
|
|
75
105
|
|
|
76
106
|
@overload
|
|
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
10
|
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
11
11
|
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
|
+
from azure.ai.evaluation._constants import _AggregationType
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@experimental
|
|
@@ -38,9 +39,11 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
38
39
|
|
|
39
40
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
40
41
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
41
|
-
:param azure_ai_project: The
|
|
42
|
-
It contains subscription id, resource group, and project name.
|
|
43
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
42
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
43
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
44
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
45
|
+
:param threshold: The threshold for the SelfHarm evaluator. Default is 3.
|
|
46
|
+
:type threshold: int
|
|
44
47
|
|
|
45
48
|
.. admonition:: Example:
|
|
46
49
|
|
|
@@ -49,22 +52,40 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
49
52
|
:end-before: [END self_harm_evaluator]
|
|
50
53
|
:language: python
|
|
51
54
|
:dedent: 8
|
|
52
|
-
:caption: Initialize and call
|
|
55
|
+
:caption: Initialize and call SelfHarmEvaluator using azure.ai.evaluation.AzureAIProject.
|
|
56
|
+
|
|
57
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
58
|
+
|
|
59
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
60
|
+
:start-after: [START self_harm_evaluator]
|
|
61
|
+
:end-before: [END self_harm_evaluator]
|
|
62
|
+
:language: python
|
|
63
|
+
:dedent: 8
|
|
64
|
+
:caption: Initialize and call SelfHarmEvaluator using Azure AI Project URL in the following format
|
|
65
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
53
66
|
"""
|
|
54
67
|
|
|
55
|
-
id = "
|
|
68
|
+
id = "azureai://built-in/evaluators/self_harm"
|
|
56
69
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
70
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
57
71
|
|
|
58
72
|
@override
|
|
59
73
|
def __init__(
|
|
60
74
|
self,
|
|
61
75
|
credential,
|
|
62
76
|
azure_ai_project,
|
|
77
|
+
*,
|
|
78
|
+
threshold: int = 3,
|
|
79
|
+
**kwargs,
|
|
63
80
|
):
|
|
64
81
|
super().__init__(
|
|
65
82
|
eval_metric=EvaluationMetrics.SELF_HARM,
|
|
66
83
|
azure_ai_project=azure_ai_project,
|
|
67
84
|
credential=credential,
|
|
85
|
+
conversation_aggregation_type=_AggregationType.MAX,
|
|
86
|
+
threshold=threshold,
|
|
87
|
+
_higher_is_better=False,
|
|
88
|
+
**kwargs,
|
|
68
89
|
)
|
|
69
90
|
|
|
70
91
|
@overload
|
|
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
10
|
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
11
11
|
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
|
+
from azure.ai.evaluation._constants import _AggregationType
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@experimental
|
|
@@ -40,9 +41,11 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
40
41
|
|
|
41
42
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
42
43
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
43
|
-
:param azure_ai_project: The
|
|
44
|
-
It contains subscription id, resource group, and project name.
|
|
45
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
44
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
45
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
46
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
47
|
+
:param threshold: The threshold for the Sexual evaluator. Default is 3.
|
|
48
|
+
:type threshold: int
|
|
46
49
|
|
|
47
50
|
.. admonition:: Example:
|
|
48
51
|
|
|
@@ -52,21 +55,48 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
52
55
|
:language: python
|
|
53
56
|
:dedent: 8
|
|
54
57
|
:caption: Initialize and call a SexualEvaluator.
|
|
58
|
+
|
|
59
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
60
|
+
|
|
61
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
62
|
+
:start-after: [START sexual_evaluator]
|
|
63
|
+
:end-before: [END sexual_evaluator]
|
|
64
|
+
:language: python
|
|
65
|
+
:dedent: 8
|
|
66
|
+
:caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
|
|
67
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
68
|
+
|
|
69
|
+
.. admonition:: Example with Threshold:
|
|
70
|
+
|
|
71
|
+
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
72
|
+
:start-after: [START threshold_sexual_evaluator]
|
|
73
|
+
:end-before: [END threshold_sexual_evaluator]
|
|
74
|
+
:language: python
|
|
75
|
+
:dedent: 8
|
|
76
|
+
:caption: Initialize with threshold and call a SexualEvaluator.
|
|
55
77
|
"""
|
|
56
78
|
|
|
57
|
-
id = "
|
|
79
|
+
id = "azureai://built-in/evaluators/sexual"
|
|
58
80
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
81
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
59
82
|
|
|
60
83
|
@override
|
|
61
84
|
def __init__(
|
|
62
85
|
self,
|
|
63
86
|
credential,
|
|
64
87
|
azure_ai_project,
|
|
88
|
+
*,
|
|
89
|
+
threshold: int = 3,
|
|
90
|
+
**kwargs,
|
|
65
91
|
):
|
|
66
92
|
super().__init__(
|
|
67
93
|
eval_metric=EvaluationMetrics.SEXUAL,
|
|
68
94
|
azure_ai_project=azure_ai_project,
|
|
69
95
|
credential=credential,
|
|
96
|
+
conversation_aggregation_type=_AggregationType.MAX,
|
|
97
|
+
threshold=threshold,
|
|
98
|
+
_higher_is_better=False,
|
|
99
|
+
**kwargs,
|
|
70
100
|
)
|
|
71
101
|
|
|
72
102
|
@overload
|
|
@@ -119,7 +149,7 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
119
149
|
key "messages". Conversation turns are expected
|
|
120
150
|
to be dictionaries with keys "content" and "role".
|
|
121
151
|
:paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
|
|
122
|
-
:return: The
|
|
152
|
+
:return: The sexual score.
|
|
123
153
|
:rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[str, float, Dict[str, List[Union[str, float]]]]]]
|
|
124
154
|
"""
|
|
125
155
|
return super().__call__(*args, **kwargs)
|
|
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
10
|
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
11
11
|
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
|
+
from azure.ai.evaluation._constants import _AggregationType
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@experimental
|
|
@@ -40,9 +41,11 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
40
41
|
|
|
41
42
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
42
43
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
43
|
-
:param azure_ai_project: The
|
|
44
|
-
It contains subscription id, resource group, and project name.
|
|
45
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
44
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
45
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
46
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
47
|
+
:param threshold: The threshold for the Violence evaluator. Default is 3.
|
|
48
|
+
:type threshold: int
|
|
46
49
|
|
|
47
50
|
.. admonition:: Example:
|
|
48
51
|
|
|
@@ -52,21 +55,48 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
52
55
|
:language: python
|
|
53
56
|
:dedent: 8
|
|
54
57
|
:caption: Initialize and call a ViolenceEvaluator.
|
|
58
|
+
|
|
59
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
60
|
+
|
|
61
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
62
|
+
:start-after: [START violence_evaluator]
|
|
63
|
+
:end-before: [END violence_evaluator]
|
|
64
|
+
:language: python
|
|
65
|
+
:dedent: 8
|
|
66
|
+
:caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
|
|
67
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
68
|
+
|
|
69
|
+
.. admonition:: Example:
|
|
70
|
+
|
|
71
|
+
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
72
|
+
:start-after: [START threshold_violence_evaluator]
|
|
73
|
+
:end-before: [END threshold_violence_evaluator]
|
|
74
|
+
:language: python
|
|
75
|
+
:dedent: 8
|
|
76
|
+
:caption: Initialize with threshold and call a ViolenceEvaluator.
|
|
55
77
|
"""
|
|
56
78
|
|
|
57
|
-
id = "
|
|
79
|
+
id = "azureai://built-in/evaluators/violence"
|
|
58
80
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
81
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
59
82
|
|
|
60
83
|
@override
|
|
61
84
|
def __init__(
|
|
62
85
|
self,
|
|
63
86
|
credential,
|
|
64
87
|
azure_ai_project,
|
|
88
|
+
*,
|
|
89
|
+
threshold: int = 3,
|
|
90
|
+
**kwargs,
|
|
65
91
|
):
|
|
66
92
|
super().__init__(
|
|
67
93
|
eval_metric=EvaluationMetrics.VIOLENCE,
|
|
68
94
|
azure_ai_project=azure_ai_project,
|
|
69
95
|
credential=credential,
|
|
96
|
+
conversation_aggregation_type=_AggregationType.MAX,
|
|
97
|
+
threshold=threshold,
|
|
98
|
+
_higher_is_better=False,
|
|
99
|
+
**kwargs,
|
|
70
100
|
)
|
|
71
101
|
|
|
72
102
|
@overload
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from ._document_retrieval import DocumentRetrievalEvaluator, RetrievalGroundTruthDocument, RetrievedDocument
|
|
6
|
+
|
|
7
|
+
__all__ = ["DocumentRetrievalEvaluator", "RetrievalGroundTruthDocument", "RetrievedDocument"]
|