azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +13 -2
- azure/ai/evaluation/_aoai/__init__.py +1 -1
- azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
- azure/ai/evaluation/_aoai/label_grader.py +3 -2
- azure/ai/evaluation/_aoai/score_model_grader.py +90 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
- azure/ai/evaluation/_azure/_envs.py +9 -10
- azure/ai/evaluation/_azure/_token_manager.py +7 -1
- azure/ai/evaluation/_common/constants.py +11 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
- azure/ai/evaluation/_common/onedp/__init__.py +32 -32
- azure/ai/evaluation/_common/onedp/_client.py +136 -139
- azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
- azure/ai/evaluation/_common/onedp/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -50
- azure/ai/evaluation/_common/onedp/_version.py +9 -9
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
- azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
- azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
- azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5657
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/rai_service.py +86 -50
- azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
- azure/ai/evaluation/_common/utils.py +124 -3
- azure/ai/evaluation/_constants.py +2 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +9 -8
- azure/ai/evaluation/_converters/_models.py +46 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +2 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +4 -4
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +60 -54
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +130 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
- azure/ai/evaluation/_evaluate/_utils.py +24 -15
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +3 -3
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +12 -11
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -5
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +15 -5
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +6 -1
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +13 -13
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +6 -6
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +34 -64
- azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +4 -4
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -2
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +3 -3
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -7
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +30 -25
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +2 -3
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +6 -6
- azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -4
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +8 -13
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -25
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +4 -4
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +21 -21
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +5 -5
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -3
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -14
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +43 -34
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +3 -3
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +12 -11
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +6 -6
- azure/ai/evaluation/_exceptions.py +10 -0
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +5 -10
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +193 -111
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +3 -1
- azure/ai/evaluation/red_team/_agent/__init__.py +1 -1
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +68 -71
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +103 -145
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +26 -6
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +62 -71
- azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
- azure/ai/evaluation/red_team/_attack_strategy.py +2 -1
- azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
- azure/ai/evaluation/red_team/_default_converter.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +1286 -739
- azure/ai/evaluation/red_team/_red_team_result.py +43 -38
- azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +32 -32
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +163 -138
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +14 -14
- azure/ai/evaluation/red_team/_utils/constants.py +2 -12
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
- azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +31 -4
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +33 -25
- azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +26 -15
- azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +5 -5
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -24
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +10 -8
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
- azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
- azure/ai/evaluation/simulator/_simulator.py +9 -8
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/METADATA +15 -1
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/RECORD +135 -131
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/top_level.txt +0 -0
|
@@ -8,15 +8,16 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
8
8
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
9
9
|
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
10
10
|
|
|
11
|
+
|
|
11
12
|
# cspell:ignore ssrf, vuln
|
|
12
13
|
@experimental
|
|
13
14
|
class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
14
15
|
"""
|
|
15
|
-
Evaluates code vulnerability for a given query and response for a single-turn evaluation only,
|
|
16
|
+
Evaluates code vulnerability for a given query and response for a single-turn evaluation only,
|
|
16
17
|
where query represents the user query or code before the completion, and response represents the code recommended by the assistant.
|
|
17
18
|
|
|
18
19
|
The code vulnerability evaluation checks for vulnerabilities in the following coding languages:
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
- Python
|
|
21
22
|
- Java
|
|
22
23
|
- C++
|
|
@@ -26,7 +27,7 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
26
27
|
- SQL
|
|
27
28
|
|
|
28
29
|
The code vulnerability evaluation identifies the following vulnerabilities:
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
- path-injection
|
|
31
32
|
- sql-injection
|
|
32
33
|
- code-injection
|
|
@@ -49,9 +50,9 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
49
50
|
|
|
50
51
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
51
52
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
52
|
-
:param azure_ai_project: The
|
|
53
|
-
It contains subscription id, resource group, and project name.
|
|
54
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
53
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
54
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
55
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
55
56
|
:param kwargs: Additional arguments to pass to the evaluator.
|
|
56
57
|
:type kwargs: Any
|
|
57
58
|
|
|
@@ -63,13 +64,13 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
63
64
|
:language: python
|
|
64
65
|
:dedent: 8
|
|
65
66
|
:caption: Initialize and call CodeVulnerabilityEvaluator with a query and response using azure.ai.evaluation.AzureAIProject.
|
|
66
|
-
|
|
67
|
+
|
|
67
68
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
68
69
|
:start-after: [START code_vulnerability_evaluator]
|
|
69
70
|
:end-before: [END code_vulnerability_evaluator]
|
|
70
71
|
:language: python
|
|
71
72
|
:dedent: 8
|
|
72
|
-
:caption: Initialize and call CodeVulnerabilityEvaluator using Azure AI Project URL in following format
|
|
73
|
+
:caption: Initialize and call CodeVulnerabilityEvaluator using Azure AI Project URL in following format
|
|
73
74
|
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
74
75
|
|
|
75
76
|
.. note::
|
|
@@ -99,7 +100,7 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
99
100
|
*,
|
|
100
101
|
query: str,
|
|
101
102
|
response: str,
|
|
102
|
-
) -> Dict[str, Union[str, float]]:
|
|
103
|
+
) -> Dict[str, Union[str, float]]:
|
|
103
104
|
"""Evaluate a given query/response pair for code vulnerability
|
|
104
105
|
|
|
105
106
|
:keyword query: The query to be evaluated.
|
|
@@ -116,7 +117,7 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
116
117
|
*args,
|
|
117
118
|
**kwargs,
|
|
118
119
|
):
|
|
119
|
-
"""Evaluate code vulnerability. Accepts query and response for a single-turn evaluation only.
|
|
120
|
+
"""Evaluate code vulnerability. Accepts query and response for a single-turn evaluation only.
|
|
120
121
|
|
|
121
122
|
:keyword query: The query to be evaluated.
|
|
122
123
|
:paramtype query: Optional[str]
|
|
@@ -124,5 +125,5 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
124
125
|
:paramtype response: Optional[str]
|
|
125
126
|
:rtype: Dict[str, Union[str, bool]]
|
|
126
127
|
"""
|
|
127
|
-
|
|
128
|
+
|
|
128
129
|
return super().__call__(*args, **kwargs)
|
|
@@ -32,17 +32,17 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
32
32
|
:language: python
|
|
33
33
|
:dedent: 8
|
|
34
34
|
:caption: Initialize and call CoherenceEvaluator using azure.ai.evaluation.AzureAIProject
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
.. admonition:: Example using Azure AI Project URL:
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
39
39
|
:start-after: [START coherence_evaluator]
|
|
40
40
|
:end-before: [END coherence_evaluator]
|
|
41
41
|
:language: python
|
|
42
42
|
:dedent: 8
|
|
43
|
-
:caption: Initialize and call CoherenceEvaluator using Azure AI Project URL in following format
|
|
43
|
+
:caption: Initialize and call CoherenceEvaluator using Azure AI Project URL in following format
|
|
44
44
|
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
.. admonition:: Example with Threshold:
|
|
47
47
|
|
|
48
48
|
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
@@ -76,7 +76,7 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
76
76
|
prompty_file=prompty_path,
|
|
77
77
|
result_key=self._RESULT_KEY,
|
|
78
78
|
threshold=threshold,
|
|
79
|
-
_higher_is_better=self._higher_is_better
|
|
79
|
+
_higher_is_better=self._higher_is_better,
|
|
80
80
|
)
|
|
81
81
|
|
|
82
82
|
@overload
|
|
@@ -416,12 +416,12 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
416
416
|
threshold_key = f"{base_key}_threshold"
|
|
417
417
|
result[threshold_key] = self._threshold
|
|
418
418
|
if self._higher_is_better:
|
|
419
|
-
if
|
|
419
|
+
if float(score_value) >= self._threshold:
|
|
420
420
|
result[result_key] = EVALUATION_PASS_FAIL_MAPPING[True]
|
|
421
421
|
else:
|
|
422
422
|
result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]
|
|
423
423
|
else:
|
|
424
|
-
if
|
|
424
|
+
if float(score_value) <= self._threshold:
|
|
425
425
|
result[result_key] = EVALUATION_PASS_FAIL_MAPPING[True]
|
|
426
426
|
else:
|
|
427
427
|
result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]
|
|
@@ -498,9 +498,19 @@ class AsyncEvaluatorBase:
|
|
|
498
498
|
# Since we want this to be relatively call-agnostic, we just account for every input that any children
|
|
499
499
|
# are known to throw at this, mash them into kwargs, and then pass them into the real call.
|
|
500
500
|
async def __call__(
|
|
501
|
-
self,
|
|
502
|
-
|
|
503
|
-
|
|
501
|
+
self,
|
|
502
|
+
*,
|
|
503
|
+
query=None,
|
|
504
|
+
response=None,
|
|
505
|
+
context=None,
|
|
506
|
+
conversation=None,
|
|
507
|
+
ground_truth=None,
|
|
508
|
+
tool_calls=None,
|
|
509
|
+
tool_definitions=None,
|
|
510
|
+
messages=None,
|
|
511
|
+
retrieval_ground_truth=None,
|
|
512
|
+
retrieved_documents=None,
|
|
513
|
+
**kwargs,
|
|
504
514
|
):
|
|
505
515
|
if conversation is not None:
|
|
506
516
|
kwargs["conversation"] = conversation
|
|
@@ -20,9 +20,14 @@ from ..._common.utils import construct_prompty_model_config, validate_model_conf
|
|
|
20
20
|
from . import EvaluatorBase
|
|
21
21
|
|
|
22
22
|
try:
|
|
23
|
-
from ..._user_agent import
|
|
23
|
+
from ..._user_agent import UserAgentSingleton
|
|
24
24
|
except ImportError:
|
|
25
|
-
|
|
25
|
+
|
|
26
|
+
class UserAgentSingleton:
|
|
27
|
+
@property
|
|
28
|
+
def value(self) -> str:
|
|
29
|
+
return "None"
|
|
30
|
+
|
|
26
31
|
|
|
27
32
|
T = TypeVar("T")
|
|
28
33
|
|
|
@@ -50,8 +55,17 @@ class PromptyEvaluatorBase(EvaluatorBase[T]):
|
|
|
50
55
|
_LLM_CALL_TIMEOUT = 600
|
|
51
56
|
_DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
|
|
52
57
|
|
|
53
|
-
def __init__(
|
|
54
|
-
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
*,
|
|
61
|
+
result_key: str,
|
|
62
|
+
prompty_file: str,
|
|
63
|
+
model_config: dict,
|
|
64
|
+
eval_last_turn: bool = False,
|
|
65
|
+
threshold: int = 3,
|
|
66
|
+
_higher_is_better: bool = False,
|
|
67
|
+
**kwargs,
|
|
68
|
+
) -> None:
|
|
55
69
|
self._result_key = result_key
|
|
56
70
|
self._is_reasoning_model = kwargs.get("is_reasoning_model", False)
|
|
57
71
|
self._prompty_file = prompty_file
|
|
@@ -60,15 +74,16 @@ class PromptyEvaluatorBase(EvaluatorBase[T]):
|
|
|
60
74
|
super().__init__(eval_last_turn=eval_last_turn, threshold=threshold, _higher_is_better=_higher_is_better)
|
|
61
75
|
|
|
62
76
|
subclass_name = self.__class__.__name__
|
|
63
|
-
user_agent = f"{
|
|
77
|
+
user_agent = f"{UserAgentSingleton().value} (type=evaluator subtype={subclass_name})"
|
|
64
78
|
prompty_model_config = construct_prompty_model_config(
|
|
65
79
|
validate_model_config(model_config),
|
|
66
80
|
self._DEFAULT_OPEN_API_VERSION,
|
|
67
81
|
user_agent,
|
|
68
82
|
)
|
|
69
83
|
|
|
70
|
-
self._flow = AsyncPrompty.load(
|
|
71
|
-
|
|
84
|
+
self._flow = AsyncPrompty.load(
|
|
85
|
+
source=self._prompty_file, model=prompty_model_config, is_reasoning_model=self._is_reasoning_model
|
|
86
|
+
)
|
|
72
87
|
|
|
73
88
|
# __call__ not overridden here because child classes have such varied signatures that there's no point
|
|
74
89
|
# defining a default here.
|
|
@@ -132,7 +147,7 @@ class PromptyEvaluatorBase(EvaluatorBase[T]):
|
|
|
132
147
|
score = float(match.group())
|
|
133
148
|
binary_result = self._get_binary_result(score)
|
|
134
149
|
return {
|
|
135
|
-
self._result_key: float(score),
|
|
150
|
+
self._result_key: float(score),
|
|
136
151
|
f"gpt_{self._result_key}": float(score),
|
|
137
152
|
f"{self._result_key}_result": binary_result,
|
|
138
153
|
f"{self._result_key}_threshold": self._threshold,
|
|
@@ -140,7 +155,7 @@ class PromptyEvaluatorBase(EvaluatorBase[T]):
|
|
|
140
155
|
|
|
141
156
|
binary_result = self._get_binary_result(score)
|
|
142
157
|
return {
|
|
143
|
-
self._result_key: float(score),
|
|
158
|
+
self._result_key: float(score),
|
|
144
159
|
f"gpt_{self._result_key}": float(score),
|
|
145
160
|
f"{self._result_key}_result": binary_result,
|
|
146
161
|
f"{self._result_key}_threshold": self._threshold,
|
|
@@ -57,7 +57,12 @@ class RaiServiceEvaluatorBase(EvaluatorBase[T]):
|
|
|
57
57
|
threshold: int = 3,
|
|
58
58
|
_higher_is_better: Optional[bool] = False,
|
|
59
59
|
):
|
|
60
|
-
super().__init__(
|
|
60
|
+
super().__init__(
|
|
61
|
+
eval_last_turn=eval_last_turn,
|
|
62
|
+
conversation_aggregation_type=conversation_aggregation_type,
|
|
63
|
+
threshold=threshold,
|
|
64
|
+
_higher_is_better=_higher_is_better,
|
|
65
|
+
)
|
|
61
66
|
self._eval_metric = eval_metric
|
|
62
67
|
self._azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
63
68
|
self._credential = credential
|
|
@@ -22,9 +22,9 @@ class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
|
|
|
22
22
|
|
|
23
23
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
24
24
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
25
|
-
:param azure_ai_project: The
|
|
26
|
-
It contains subscription id, resource group, and project name.
|
|
27
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
25
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
26
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
27
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
28
28
|
:param violence_threshold: The threshold for violence evaluation. Default is 3.
|
|
29
29
|
:type violence_threshold: int
|
|
30
30
|
:param sexual_threshold: The threshold for sexual content evaluation. Default is 3.
|
|
@@ -45,19 +45,19 @@ class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
|
|
|
45
45
|
:language: python
|
|
46
46
|
:dedent: 8
|
|
47
47
|
:caption: Initialize and call ContentSafetyEvaluator using azure.ai.evaluation.AzureAIProject.
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
.. admonition:: Example using Azure AI Project URL:
|
|
50
|
-
|
|
50
|
+
|
|
51
51
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
52
52
|
:start-after: [START content_safety_evaluator]
|
|
53
53
|
:end-before: [END content_safety_evaluator]
|
|
54
54
|
:language: python
|
|
55
55
|
:dedent: 8
|
|
56
|
-
:caption: Initialize and call ContentSafetyEvaluator using Azure AI Project URL in the following format
|
|
56
|
+
:caption: Initialize and call ContentSafetyEvaluator using Azure AI Project URL in the following format
|
|
57
57
|
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
.. admonition:: Example with Threshold:
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
62
62
|
:start-after: [START threshold_content_safety_evaluator]
|
|
63
63
|
:end-before: [END threshold_content_safety_evaluator]
|
|
@@ -70,15 +70,15 @@ class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
|
|
|
70
70
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
71
71
|
|
|
72
72
|
def __init__(
|
|
73
|
-
self,
|
|
74
|
-
credential,
|
|
73
|
+
self,
|
|
74
|
+
credential,
|
|
75
75
|
azure_ai_project,
|
|
76
|
-
*,
|
|
76
|
+
*,
|
|
77
77
|
violence_threshold: int = 3,
|
|
78
78
|
sexual_threshold: int = 3,
|
|
79
79
|
self_harm_threshold: int = 3,
|
|
80
80
|
hate_unfairness_threshold: int = 3,
|
|
81
|
-
**kwargs
|
|
81
|
+
**kwargs,
|
|
82
82
|
):
|
|
83
83
|
# Type checking
|
|
84
84
|
for name, value in [
|
|
@@ -89,7 +89,7 @@ class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
|
|
|
89
89
|
]:
|
|
90
90
|
if not isinstance(value, int):
|
|
91
91
|
raise TypeError(f"{name} must be an int, got {type(value)}")
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
evaluators = [
|
|
94
94
|
ViolenceEvaluator(credential, azure_ai_project, threshold=violence_threshold),
|
|
95
95
|
SexualEvaluator(credential, azure_ai_project, threshold=sexual_threshold),
|
|
@@ -45,9 +45,9 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
45
45
|
|
|
46
46
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
47
47
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
48
|
-
:param azure_ai_project: The
|
|
49
|
-
It contains subscription id, resource group, and project name.
|
|
50
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
48
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
49
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
50
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
51
51
|
:param threshold: The threshold for the HateUnfairness evaluator. Default is 3.
|
|
52
52
|
:type threshold: int
|
|
53
53
|
|
|
@@ -59,17 +59,17 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
59
59
|
:language: python
|
|
60
60
|
:dedent: 8
|
|
61
61
|
:caption: Initialize and call HateUnfairnessEvaluator using azure.ai.evaluation.AzureAIProject.
|
|
62
|
-
|
|
62
|
+
|
|
63
63
|
.. admonition:: Example using Azure AI Project URL:
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
66
66
|
:start-after: [START hate_unfairness_evaluator]
|
|
67
67
|
:end-before: [END hate_unfairness_evaluator]
|
|
68
68
|
:language: python
|
|
69
69
|
:dedent: 8
|
|
70
|
-
:caption: Initialize and call HateUnfairnessEvaluator using Azure AI Project URL in the following format
|
|
70
|
+
:caption: Initialize and call HateUnfairnessEvaluator using Azure AI Project URL in the following format
|
|
71
71
|
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
.. admonition:: Example with Threshold:
|
|
74
74
|
|
|
75
75
|
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
@@ -39,9 +39,9 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
39
39
|
|
|
40
40
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
41
41
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
42
|
-
:param azure_ai_project: The
|
|
43
|
-
It contains subscription id, resource group, and project name.
|
|
44
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
42
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
43
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
44
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
45
45
|
:param threshold: The threshold for the SelfHarm evaluator. Default is 3.
|
|
46
46
|
:type threshold: int
|
|
47
47
|
|
|
@@ -53,16 +53,16 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
53
53
|
:language: python
|
|
54
54
|
:dedent: 8
|
|
55
55
|
:caption: Initialize and call SelfHarmEvaluator using azure.ai.evaluation.AzureAIProject.
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
.. admonition:: Example using Azure AI Project URL:
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
60
60
|
:start-after: [START self_harm_evaluator]
|
|
61
61
|
:end-before: [END self_harm_evaluator]
|
|
62
62
|
:language: python
|
|
63
63
|
:dedent: 8
|
|
64
|
-
:caption: Initialize and call SelfHarmEvaluator using Azure AI Project URL in the following format
|
|
65
|
-
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
64
|
+
:caption: Initialize and call SelfHarmEvaluator using Azure AI Project URL in the following format
|
|
65
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
|
|
66
66
|
"""
|
|
67
67
|
|
|
68
68
|
id = "azureml://registries/azureml/models/Self-Harm-Related-Content-Evaluator/versions/3"
|
|
@@ -41,9 +41,9 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
41
41
|
|
|
42
42
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
43
43
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
44
|
-
:param azure_ai_project: The
|
|
45
|
-
It contains subscription id, resource group, and project name.
|
|
46
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
44
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
45
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
46
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
47
47
|
:param threshold: The threshold for the Sexual evaluator. Default is 3.
|
|
48
48
|
:type threshold: int
|
|
49
49
|
|
|
@@ -55,17 +55,17 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
55
55
|
:language: python
|
|
56
56
|
:dedent: 8
|
|
57
57
|
:caption: Initialize and call a SexualEvaluator.
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
.. admonition:: Example using Azure AI Project URL:
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
62
62
|
:start-after: [START sexual_evaluator]
|
|
63
63
|
:end-before: [END sexual_evaluator]
|
|
64
64
|
:language: python
|
|
65
65
|
:dedent: 8
|
|
66
|
-
:caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
|
|
66
|
+
:caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
|
|
67
67
|
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
.. admonition:: Example with Threshold:
|
|
70
70
|
|
|
71
71
|
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
@@ -41,9 +41,9 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
41
41
|
|
|
42
42
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
43
43
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
44
|
-
:param azure_ai_project: The
|
|
45
|
-
It contains subscription id, resource group, and project name.
|
|
46
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
44
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
45
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
46
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
47
47
|
:param threshold: The threshold for the Violence evaluator. Default is 3.
|
|
48
48
|
:type threshold: int
|
|
49
49
|
|
|
@@ -57,15 +57,15 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
|
|
|
57
57
|
:caption: Initialize and call a ViolenceEvaluator.
|
|
58
58
|
|
|
59
59
|
.. admonition:: Example using Azure AI Project URL:
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
62
62
|
:start-after: [START violence_evaluator]
|
|
63
63
|
:end-before: [END violence_evaluator]
|
|
64
64
|
:language: python
|
|
65
65
|
:dedent: 8
|
|
66
|
-
:caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
|
|
66
|
+
:caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
|
|
67
67
|
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
.. admonition:: Example:
|
|
70
70
|
|
|
71
71
|
.. literalinclude:: ../samples/evaluation_samples_threshold.py
|
|
@@ -4,8 +4,4 @@
|
|
|
4
4
|
|
|
5
5
|
from ._document_retrieval import DocumentRetrievalEvaluator, RetrievalGroundTruthDocument, RetrievedDocument
|
|
6
6
|
|
|
7
|
-
__all__ = [
|
|
8
|
-
"DocumentRetrievalEvaluator",
|
|
9
|
-
"RetrievalGroundTruthDocument",
|
|
10
|
-
"RetrievedDocument"
|
|
11
|
-
]
|
|
7
|
+
__all__ = ["DocumentRetrievalEvaluator", "RetrievalGroundTruthDocument", "RetrievedDocument"]
|