azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (76) hide show
  1. azure/ai/evaluation/__init__.py +4 -4
  2. azure/ai/evaluation/_common/rai_service.py +4 -4
  3. azure/ai/evaluation/_common/utils.py +40 -25
  4. azure/ai/evaluation/_constants.py +13 -0
  5. azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
  6. azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
  7. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
  8. azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
  9. azure/ai/evaluation/_evaluate/_evaluate.py +88 -63
  10. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
  11. azure/ai/evaluation/_evaluate/_utils.py +29 -22
  12. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  13. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +34 -86
  14. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
  15. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  16. azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
  17. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
  18. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
  19. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  20. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
  21. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
  22. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
  23. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
  24. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
  25. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
  26. azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
  27. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
  28. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +29 -79
  29. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
  30. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  31. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +33 -85
  32. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
  33. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
  34. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
  35. azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
  36. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +34 -88
  37. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
  38. azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
  39. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +17 -29
  40. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
  41. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
  42. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -18
  43. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  44. azure/ai/evaluation/_evaluators/_xpia/xpia.py +16 -91
  45. azure/ai/evaluation/_exceptions.py +0 -1
  46. azure/ai/evaluation/_http_utils.py +3 -3
  47. azure/ai/evaluation/_model_configurations.py +36 -8
  48. azure/ai/evaluation/_version.py +1 -1
  49. azure/ai/evaluation/simulator/__init__.py +1 -1
  50. azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
  51. azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  52. azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
  53. azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
  54. azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
  55. azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
  56. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
  57. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
  58. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
  59. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
  60. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  61. azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
  62. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  63. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
  64. azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +166 -88
  65. azure/ai/evaluation/simulator/_tracing.py +21 -24
  66. azure/ai/evaluation/simulator/_utils.py +4 -1
  67. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/METADATA +144 -14
  68. azure_ai_evaluation-1.0.0b3.dist-info/RECORD +98 -0
  69. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -350
  70. azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
  71. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -66
  72. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  73. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  74. azure_ai_evaluation-1.0.0b1.dist-info/RECORD +0 -97
  75. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/WHEEL +0 -0
  76. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt +0 -0
@@ -1,350 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
- import json
5
- import logging
6
- from concurrent.futures import as_completed
7
- from typing import Dict, List, Union
8
-
9
- import numpy as np
10
-
11
- from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
12
-
13
- from .._coherence import CoherenceEvaluator
14
- from .._fluency import FluencyEvaluator
15
- from .._groundedness import GroundednessEvaluator
16
- from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
17
- from .._relevance import RelevanceEvaluator
18
- from .retrieval import RetrievalChatEvaluator
19
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class ChatEvaluator:
25
- """
26
- Initialize a chat evaluator configured for a specific Azure OpenAI model.
27
-
28
- :param model_config: Configuration for the Azure OpenAI model.
29
- :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
- ~azure.ai.evaluation.OpenAIModelConfiguration]
31
- :param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
32
- focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
33
- :type eval_last_turn: bool
34
- :param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
35
- Default is True.
36
- :type parallel: bool
37
- :return: A function that evaluates and generates metrics for "chat" scenario.
38
- :rtype: Callable
39
-
40
- **Usage**
41
-
42
- .. code-block:: python
43
-
44
- chat_eval = ChatEvaluator(model_config)
45
- conversation = [
46
- {"role": "user", "content": "What is the value of 2 + 2?"},
47
- {"role": "assistant", "content": "2 + 2 = 4", "context": {
48
- "citations": [
49
- {"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
50
- ]
51
- }
52
- }
53
- ]
54
- result = chat_eval(conversation=conversation)
55
-
56
- **Output format**
57
-
58
- .. code-block:: python
59
-
60
- {
61
- "evaluation_per_turn": {
62
- "gpt_retrieval": [1.0, 2.0],
63
- "gpt_groundedness": [5.0, 2.0],
64
- "gpt_relevance": [3.0, 5.0],
65
- "gpt_coherence": [1.0, 2.0],
66
- "gpt_fluency": [3.0, 5.0]
67
- }
68
- "gpt_retrieval": 1.5,
69
- "gpt_groundedness": 3.5,
70
- "gpt_relevance": 4.0,
71
- "gpt_coherence": 1.5,
72
- "gpt_fluency": 4.0
73
- }
74
- """
75
-
76
- def __init__(
77
- self,
78
- model_config: dict,
79
- eval_last_turn: bool = False,
80
- parallel: bool = True,
81
- ):
82
- self._eval_last_turn = eval_last_turn
83
- self._parallel = parallel
84
-
85
- # TODO: Need a built-in evaluator for retrieval. It needs to be added to `self._rag_evaluators` collection
86
- self._rag_evaluators = [
87
- GroundednessEvaluator(model_config),
88
- RelevanceEvaluator(model_config),
89
- ]
90
- self._non_rag_evaluators = [
91
- CoherenceEvaluator(model_config),
92
- FluencyEvaluator(model_config),
93
- ]
94
- # TODO: Temporary workaround to close the gap of missing retrieval score
95
- # https://msdata.visualstudio.com/Vienna/_workitems/edit/3186644
96
- # For long term, we need to add a built-in evaluator for retrieval after prompt is generalized for QA and Chat
97
- self._retrieval_chat_evaluator = RetrievalChatEvaluator(model_config)
98
-
99
- def __call__(self, *, conversation, **kwargs):
100
- """
101
- Evaluates chat scenario.
102
-
103
- :keyword conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys.
104
- "context" key is optional for assistant's turn and should have "citations" key with list of citations.
105
- :paramtype conversation: List[Dict]
106
- :return: The scores for Chat scenario.
107
- :rtype: dict
108
- """
109
- self._validate_conversation(conversation)
110
-
111
- # Extract queries, responses and contexts from conversation
112
- queries = []
113
- responses = []
114
- contexts = []
115
-
116
- if self._eval_last_turn:
117
- # Process only the last two turns if _eval_last_turn is True
118
- conversation_slice = conversation[-2:] if len(conversation) >= 2 else conversation
119
- else:
120
- conversation_slice = conversation
121
-
122
- for each_turn in conversation_slice:
123
- role = each_turn["role"]
124
- if role == "user":
125
- queries.append(each_turn["content"])
126
- elif role == "assistant":
127
- responses.append(each_turn["content"])
128
- if "context" in each_turn and "citations" in each_turn["context"]:
129
- citations = json.dumps(each_turn["context"]["citations"])
130
- contexts.append(citations)
131
-
132
- # Select evaluators to be used for evaluation
133
- compute_rag_based_metrics = True
134
- if len(responses) != len(contexts):
135
- safe_message = (
136
- "Skipping rag based metrics as we need citations or "
137
- "retrieved_documents in context key of every assistant's turn"
138
- )
139
- logger.warning(safe_message)
140
- compute_rag_based_metrics = False
141
-
142
- selected_evaluators = []
143
- selected_evaluators.extend(self._non_rag_evaluators)
144
- if compute_rag_based_metrics:
145
- selected_evaluators.extend(self._rag_evaluators)
146
-
147
- # Evaluate each turn
148
- per_turn_results = []
149
- for turn_num in range(len(queries)):
150
- current_turn_result = {}
151
-
152
- if self._parallel:
153
- # Parallel execution
154
- with ThreadPoolExecutor() as executor:
155
- future_to_evaluator = {
156
- executor.submit(
157
- self._evaluate_turn, turn_num, queries, responses, contexts, evaluator
158
- ): evaluator
159
- for evaluator in selected_evaluators
160
- }
161
-
162
- for future in as_completed(future_to_evaluator):
163
- result = future.result()
164
- current_turn_result.update(result)
165
- else:
166
- # Sequential execution
167
- for evaluator in selected_evaluators:
168
- async_evaluator = evaluator._to_async()
169
- result = self._evaluate_turn(turn_num, queries, responses, contexts, async_evaluator)
170
- current_turn_result.update(result)
171
-
172
- per_turn_results.append(current_turn_result)
173
-
174
- # Aggregate results
175
- # Final aggregated results for a conversation will look like:
176
- # "gpt_groundedness": 2.0, # Mean of all groundedness scores
177
- # "evaluation_per_turn": {
178
- # "gpt_groundedness": {
179
- # "score": [1.0, ...],
180
- # "reason": ["reason1", ...],
181
- # },
182
- # },
183
- # }
184
- aggregated = self._aggregate_results(per_turn_results)
185
-
186
- # Run RetrievalChatEvaluator and merge the results
187
- if compute_rag_based_metrics:
188
- retrieval_score = self._retrieval_chat_evaluator(conversation=conversation_slice)
189
- aggregated["gpt_retrieval"] = retrieval_score["gpt_retrieval"]
190
- aggregated["evaluation_per_turn"]["gpt_retrieval"] = retrieval_score["evaluation_per_turn"]["gpt_retrieval"]
191
- aggregated = dict(sorted(aggregated.items()))
192
-
193
- return aggregated
194
-
195
- def _evaluate_turn(self, turn_num, queries, responses, contexts, evaluator):
196
- try:
197
- query = queries[turn_num] if turn_num < len(queries) else ""
198
- response = responses[turn_num] if turn_num < len(responses) else ""
199
- context = contexts[turn_num] if turn_num < len(contexts) else ""
200
-
201
- score = evaluator(query=query, response=response, context=context)
202
-
203
- return score
204
- except Exception as e: # pylint: disable=broad-exception-caught
205
- logger.warning(
206
- f"Evaluator {evaluator.__class__.__name__} failed for turn {turn_num + 1} with exception: {e}"
207
- )
208
- return {}
209
-
210
- def _aggregate_results(self, per_turn_results: List[Dict]):
211
- scores = {}
212
- reasons = {}
213
-
214
- for turn in per_turn_results:
215
- for metric, value in turn.items():
216
- if "reason" in metric:
217
- if metric not in reasons:
218
- reasons[metric] = []
219
- reasons[metric].append(value)
220
- else:
221
- if metric not in scores:
222
- scores[metric] = []
223
- scores[metric].append(value)
224
-
225
- aggregated = {}
226
- evaluation_per_turn = {}
227
-
228
- for metric, values in scores.items():
229
- aggregated[metric] = np.nanmean(values)
230
-
231
- # Prepare per-turn evaluations
232
- evaluation_per_turn[metric] = {"score": values}
233
- reason_key = f"{metric}_reason"
234
- if reason_key in reasons:
235
- evaluation_per_turn[metric]["reason"] = reasons[reason_key]
236
-
237
- aggregated["evaluation_per_turn"] = evaluation_per_turn
238
-
239
- return aggregated
240
-
241
- def _validate_conversation(self, conversation: List[Dict]):
242
- if conversation is None or not isinstance(conversation, list):
243
- msg = "conversation must be a list of dictionaries"
244
- raise EvaluationException(
245
- message=msg,
246
- internal_message=msg,
247
- target=ErrorTarget.CHAT_EVALUATOR,
248
- category=ErrorCategory.INVALID_VALUE,
249
- blame=ErrorBlame.USER_ERROR,
250
- )
251
-
252
- expected_role = "user"
253
- for turn_num, turn in enumerate(conversation):
254
- one_based_turn_num = turn_num + 1
255
-
256
- if not isinstance(turn, dict):
257
- msg = f"Each turn in 'conversation' must be a dictionary. Turn number: {one_based_turn_num}"
258
- raise EvaluationException(
259
- message=msg,
260
- internal_message=msg,
261
- target=ErrorTarget.CHAT_EVALUATOR,
262
- category=ErrorCategory.INVALID_VALUE,
263
- blame=ErrorBlame.USER_ERROR,
264
- )
265
-
266
- if "role" not in turn or "content" not in turn:
267
- msg = f"Each turn in 'conversation' must have 'role' and 'content' keys. Turn number: {one_based_turn_num}"
268
- raise EvaluationException(
269
- message=msg,
270
- internal_message=msg,
271
- target=ErrorTarget.CHAT_EVALUATOR,
272
- category=ErrorCategory.INVALID_VALUE,
273
- blame=ErrorBlame.USER_ERROR,
274
- )
275
-
276
- if turn["role"] != expected_role:
277
- msg = f"Expected role {expected_role} but got {turn['role']}. Turn number: {one_based_turn_num}"
278
- raise EvaluationException(
279
- message=msg,
280
- internal_message=msg,
281
- target=ErrorTarget.CHAT_EVALUATOR,
282
- category=ErrorCategory.INVALID_VALUE,
283
- blame=ErrorBlame.USER_ERROR,
284
- )
285
-
286
- if not isinstance(turn["content"], str):
287
- msg = f"Content in each turn must be a string. Turn number: {one_based_turn_num}"
288
- raise EvaluationException(
289
- message=msg,
290
- internal_message=msg,
291
- target=ErrorTarget.CHAT_EVALUATOR,
292
- category=ErrorCategory.INVALID_VALUE,
293
- blame=ErrorBlame.USER_ERROR,
294
- )
295
-
296
- if turn["role"] == "assistant" and "context" in turn:
297
- if not isinstance(turn["context"], dict):
298
- msg = f"Context in each assistant's turn must be a dictionary. Turn number: {one_based_turn_num}"
299
- raise EvaluationException(
300
- message=msg,
301
- internal_message=msg,
302
- target=ErrorTarget.CHAT_EVALUATOR,
303
- category=ErrorCategory.INVALID_VALUE,
304
- blame=ErrorBlame.USER_ERROR,
305
- )
306
-
307
- if "citations" not in turn["context"]:
308
- msg = f"Context in each assistant's turn must have 'citations' key. Turn number: {one_based_turn_num}"
309
- raise EvaluationException(
310
- message=msg,
311
- internal_message=msg,
312
- target=ErrorTarget.CHAT_EVALUATOR,
313
- category=ErrorCategory.MISSING_FIELD,
314
- blame=ErrorBlame.USER_ERROR,
315
- )
316
-
317
- if not isinstance(turn["context"]["citations"], list):
318
- msg = f"'citations' in context must be a list. Turn number: {one_based_turn_num}"
319
- raise EvaluationException(
320
- message=msg,
321
- internal_message=msg,
322
- target=ErrorTarget.CHAT_EVALUATOR,
323
- category=ErrorCategory.INVALID_VALUE,
324
- blame=ErrorBlame.USER_ERROR,
325
- )
326
-
327
- for citation_num, citation in enumerate(turn["context"]["citations"]):
328
- if not isinstance(citation, dict):
329
- msg = f"Each citation in 'citations' must be a dictionary. Turn number: {one_based_turn_num}, Citation number: {citation_num + 1}"
330
- raise EvaluationException(
331
- message=msg,
332
- internal_message=msg,
333
- target=ErrorTarget.CHAT_EVALUATOR,
334
- category=ErrorCategory.INVALID_VALUE,
335
- blame=ErrorBlame.USER_ERROR,
336
- )
337
-
338
- # Toggle expected role for the next turn
339
- expected_role = "user" if expected_role == "assistant" else "assistant"
340
-
341
- # Ensure the conversation ends with an assistant's turn
342
- if expected_role != "user":
343
- msg = "The conversation must end with an assistant's turn."
344
- raise EvaluationException(
345
- message=msg,
346
- internal_message=msg,
347
- target=ErrorTarget.CHAT_EVALUATOR,
348
- category=ErrorCategory.INVALID_VALUE,
349
- blame=ErrorBlame.USER_ERROR,
350
- )
@@ -1,9 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
-
5
- from ._retrieval import RetrievalChatEvaluator
6
-
7
- __all__ = [
8
- "RetrievalChatEvaluator",
9
- ]
@@ -1,66 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
-
5
- from abc import ABC
6
-
7
- from azure.ai.evaluation._common.constants import EvaluationMetrics
8
- from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
9
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
10
- from azure.ai.evaluation._model_configurations import AzureAIProject
11
-
12
-
13
- class ContentSafetyEvaluatorBase(ABC):
14
- """
15
- Initialize a evaluator for a specified Evaluation Metric. Base class that is not
16
- meant to be instantiated by users.
17
-
18
-
19
- :param metric: The metric to be evaluated.
20
- :type metric: ~azure.ai.evaluation._evaluators._content_safety.flow.constants.EvaluationMetrics
21
- :param azure_ai_project: The scope of the Azure AI project.
22
- It contains subscription id, resource group, and project name.
23
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
24
- :param credential: The credential for connecting to Azure AI project.
25
- :type credential: ~azure.core.credentials.TokenCredential
26
- """
27
-
28
- def __init__(self, metric: EvaluationMetrics, azure_ai_project: dict, credential=None):
29
- self._metric = metric
30
- self._azure_ai_project = azure_ai_project
31
- self._credential = credential
32
-
33
- async def __call__(self, *, query: str, response: str, **kwargs):
34
- """
35
- Evaluates content according to this evaluator's metric.
36
-
37
- :keyword query: The query to be evaluated.
38
- :paramtype query: str
39
- :keyword response: The response to be evaluated.
40
- :paramtype response: str
41
- :return: The evaluation score computation based on the Content Safety metric (self.metric).
42
- :rtype: Any
43
- """
44
- # Validate inputs
45
- # Raises value error if failed, so execution alone signifies success.
46
- if not (query and query.strip() and query != "None") or not (
47
- response and response.strip() and response != "None"
48
- ):
49
- msg = "Both 'query' and 'response' must be non-empty strings."
50
- raise EvaluationException(
51
- message=msg,
52
- internal_message=msg,
53
- error_category=ErrorCategory.MISSING_FIELD,
54
- error_blame=ErrorBlame.USER_ERROR,
55
- error_target=ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
56
- )
57
-
58
- # Run score computation based on supplied metric.
59
- result = await evaluate_with_rai_service(
60
- metric_name=self._metric,
61
- query=query,
62
- response=response,
63
- project_scope=self._azure_ai_project,
64
- credential=self._credential,
65
- )
66
- return result
@@ -1,5 +0,0 @@
1
- from ._protected_materials import ProtectedMaterialsEvaluator
2
-
3
- __all__ = [
4
- "ProtectedMaterialsEvaluator",
5
- ]
@@ -1,104 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
- from promptflow._utils.async_utils import async_run_allowing_running_loop
5
- from azure.ai.evaluation._common.constants import EvaluationMetrics
6
- from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
7
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
8
- from azure.ai.evaluation._model_configurations import AzureAIProject
9
-
10
-
11
- class _AsyncProtectedMaterialsEvaluator:
12
- def __init__(self, azure_ai_project: dict, credential=None):
13
- self._azure_ai_project = azure_ai_project
14
- self._credential = credential
15
-
16
- async def __call__(self, *, query: str, response: str, **kwargs):
17
- """
18
- Evaluates content according to this evaluator's metric.
19
-
20
- :keyword query: The query to be evaluated.
21
- :paramtype query: str
22
- :keyword response: The response to be evaluated.
23
- :paramtype response: str
24
- :return: The evaluation score computation based on the Content Safety metric (self.metric).
25
- :rtype: Any
26
- """
27
- # Validate inputs
28
- # Raises value error if failed, so execution alone signifies success.
29
- if not (query and query.strip() and query != "None") or not (
30
- response and response.strip() and response != "None"
31
- ):
32
- msg = "Both 'query' and 'response' must be non-empty strings."
33
- raise EvaluationException(
34
- message=msg,
35
- internal_message=msg,
36
- error_category=ErrorCategory.MISSING_FIELD,
37
- error_blame=ErrorBlame.USER_ERROR,
38
- error_target=ErrorTarget.PROTECTED_MATERIAL_EVALUATOR,
39
- )
40
-
41
- # Run score computation based on supplied metric.
42
- result = await evaluate_with_rai_service(
43
- metric_name=EvaluationMetrics.PROTECTED_MATERIAL,
44
- query=query,
45
- response=response,
46
- project_scope=self._azure_ai_project,
47
- credential=self._credential,
48
- )
49
- return result
50
-
51
-
52
- class ProtectedMaterialsEvaluator:
53
- """
54
- Initialize a protected materials evaluator to detect whether protected material
55
- is present in your AI system's response. Outputs True or False with AI-generated reasoning.
56
-
57
- :param azure_ai_project: The scope of the Azure AI project.
58
- It contains subscription id, resource group, and project name.
59
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
60
- :param credential: The credential for connecting to Azure AI project.
61
- :type credential: ~azure.core.credentials.TokenCredential
62
- :return: Whether or not protected material was found in the response, with AI-generated reasoning.
63
- :rtype: Dict[str, str]
64
-
65
- **Usage**
66
-
67
- .. code-block:: python
68
-
69
- azure_ai_project = {
70
- "subscription_id": "<subscription_id>",
71
- "resource_group_name": "<resource_group_name>",
72
- "project_name": "<project_name>",
73
- }
74
- eval_fn = ProtectedMaterialsEvaluator(azure_ai_project)
75
- result = eval_fn(query="What is the capital of France?", response="Paris.")
76
-
77
- **Output format**
78
-
79
- .. code-block:: python
80
-
81
- {
82
- "label": "False",
83
- "reasoning": "This query does not contain any protected material."
84
- }
85
- """
86
-
87
- def __init__(self, azure_ai_project: dict, credential=None):
88
- self._async_evaluator = _AsyncProtectedMaterialsEvaluator(azure_ai_project, credential)
89
-
90
- def __call__(self, *, query: str, response: str, **kwargs):
91
- """
92
- Evaluates protected materials content.
93
-
94
- :keyword query: The query to be evaluated.
95
- :paramtype query: str
96
- :keyword response: The response to be evaluated.
97
- :paramtype response: str
98
- :return: A dictionary containing a boolean label and reasoning.
99
- :rtype: dict
100
- """
101
- return async_run_allowing_running_loop(self._async_evaluator, query=query, response=response, **kwargs)
102
-
103
- def _to_async(self):
104
- return self._async_evaluator
@@ -1,97 +0,0 @@
1
- azure/ai/evaluation/__init__.py,sha256=aCQXu_6B3tB-WSRtdPrMUODYMytwK1pVoOTwkR5EItg,1984
2
- azure/ai/evaluation/_constants.py,sha256=MtXK9FV3TgDiq8IYSMkSDbNXVQsZw62D26g7ZXJ62NU,1422
3
- azure/ai/evaluation/_exceptions.py,sha256=HUMfvguDc7ygcbs3MTK14R3PK7UxGNWQQHH3hYXIV3U,4168
4
- azure/ai/evaluation/_http_utils.py,sha256=13G64fWveezBdcmjwwwbJrIkdZp2ns3Jvd-addKw1SU,13983
5
- azure/ai/evaluation/_model_configurations.py,sha256=D02AzOdyO6LQCia0k232Msd7ro35-EcwmlQ0tOD_5H0,652
6
- azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
7
- azure/ai/evaluation/_version.py,sha256=oZ93SfxVsREL_UaqV9_yCzfIy_LG5t9AwK_uEE8HjXI,201
8
- azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- azure/ai/evaluation/_common/__init__.py,sha256=LHTkf6dMLLxikrGNgbUuREBVQcs4ORHR6Eryo4bm9M8,586
10
- azure/ai/evaluation/_common/constants.py,sha256=ZKHGVgGA1Fc6Pvo22X-CeOUX6-m0q_UwpOKOWATTSuI,1639
11
- azure/ai/evaluation/_common/rai_service.py,sha256=Cci_YnX1XeYHiZX4DTVj6_bAj2JADXh1znXS_cDM45M,17540
12
- azure/ai/evaluation/_common/utils.py,sha256=Ed2pgAXzswrOskWopYyRGdPMOkwbP-cgj32kBOuWMZw,2941
13
- azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
14
- azure/ai/evaluation/_evaluate/_eval_run.py,sha256=eUUUzF6o3FjkQ6L5aZnESQjWM_UscM5MnCD-5P0dldA,20237
15
- azure/ai/evaluation/_evaluate/_evaluate.py,sha256=HP7J39YUn9yMKLl2z-k-7_BVpzQIhDQwFOExQk6nb9w,28379
16
- azure/ai/evaluation/_evaluate/_utils.py,sha256=-8xv0TF3QQPfZlEpLOCGSq2Nvb-esZfpaDySOHLeey4,9680
17
- azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py,sha256=BkxhojWca3e2QM3hFwO2xrLiiQ0i-3f8wsMfOx1zchs,361
18
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py,sha256=wL_gNM2u8_DYUKopxFnJFc0cUAxWo9HV5gNSfZoluJo,2983
19
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py,sha256=xmck4Wlp0wrzkN6sZsmW-7MMAc1rIw16xwSV52_V8to,7120
20
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py,sha256=_2NynIyH_QFZQf5TDZ9x_FtDUXrzvMfGVuTtpZJazpo,1981
21
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=gF0mHhCYjrlOBgR4I6mQ5KIfExpq7W2On31ITmjYN8M,6400
22
- azure/ai/evaluation/_evaluators/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
23
- azure/ai/evaluation/_evaluators/_bleu/__init__.py,sha256=quKKO0kvOSkky5hcoNBvgBuMeeVRFCE9GSv70mAdGP4,260
24
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=q8hTVC8Ubfb0lQmlGnsuEb6J7Qx5c7eYso4ss6RhgOw,2413
25
- azure/ai/evaluation/_evaluators/_chat/__init__.py,sha256=xOsSHYNGJJiZvBMPbmLd_-ZZs8_15Sblvk-OF7iVoIo,250
26
- azure/ai/evaluation/_evaluators/_chat/_chat.py,sha256=GsK7EERUNo5uuh025EazxpK6wuN7mzz891_l8WFxqXg,14692
27
- azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py,sha256=DmBjBkwDDlCsSGpBeXfpfMM9ekxIJs62dij4rBXND7k,273
28
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py,sha256=X_2UzFd-FP032nT4fjH1EY67bvIcRpmmjv9KQrXpfUE,5739
29
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty,sha256=NoHNDf_UE8BGAixqZPhRQ4ecxOUi9w9gO8HfHhJuxaY,1761
30
- azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0UsRNWMIXiea2lLtPPXNGvkJzQ0,258
31
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=sOP2rYwGe1k_80FhBr5PGG8metmpJwGfx4U7-aVroIs,4121
32
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=WVEXxKmh_Gbb11_00N2WCIIJSMgPssFxJ5h2--rMG-w,2725
33
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py,sha256=mR5CbcMyxV9GQoY71Saoi0bQTpEB74HrYmM8gcVhnAg,746
34
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py,sha256=VUazS5AcJ5Z-BOrqfCRt5SdRhmTlxWdxl03FML0zHq4,3998
35
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py,sha256=Yb2x-sI5_GivAe8o8BvA2gM64yQCv73v5LS_QyiR9vQ,2802
36
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py,sha256=7cLTnbz0iHderZeXPKYbbBSwvKYZtMiExEMYQSfggd4,11909
37
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py,sha256=Ur1gQPBdr0b_Ov8K0Jflmvua2mgSrk9DhlPOAQR-7w4,2875
38
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py,sha256=MrCOPNmv6PAkVFKDrwNfXeOyhTmyDa2Tik_xEFKxYwg,2676
39
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py,sha256=MyRI9QtqElb2gDGPGnyQYfAHsjdw_oK6Ekjz4Voo9rg,2644
40
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py,sha256=YH0NxossokGX5tFKAIZJvMiprs34KuiNQnMXAbOOMCM,2668
41
- azure/ai/evaluation/_evaluators/_eci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- azure/ai/evaluation/_evaluators/_eci/_eci.py,sha256=vm8G-YvZuXipvJSixA6mktEL7xQvqnsHcD1trqh7Cxw,4058
43
- azure/ai/evaluation/_evaluators/_f1_score/__init__.py,sha256=aEVbO7iMoF20obdpLQKcKm69Yyu3mYnblKELLqu8OGI,260
44
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py,sha256=KNMni1m8dtaC6kLf9JRUVfJ1hZfYBi-g6iZiruJ_Hyc,4553
45
- azure/ai/evaluation/_evaluators/_fluency/__init__.py,sha256=EEJw39xRa0bOAA1rELTTKXQu2s60n_7CZQRD0Gu2QVw,259
46
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py,sha256=0KSSYQjWzPpGLBK-NIkIdfp0fPcCOpjIkuKMbYprqI0,4082
47
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty,sha256=RparSdDZs-xiGbq7lRifz9z7jaD10ldXDU3E7sO0v2s,2579
48
- azure/ai/evaluation/_evaluators/_gleu/__init__.py,sha256=Ae2EvQ7gqiYAoNO3LwGIhdAAjJPJDfT85rQGKrRrmbA,260
49
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=qQR5Ic4K4biHrMm8XLe1DtKc3MSwbfCjK_-CLpL9aCc,2326
50
- azure/ai/evaluation/_evaluators/_groundedness/__init__.py,sha256=UYNJUeRvBwcSVFyZpdsf29un5eyaDzYoo3QvC1gvlLg,274
51
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=BZdr2Gqgos1CDTnMPJPUfoa_Pkq9WxgdL-Qem6GbINs,4301
52
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty,sha256=dDclp_YowBjeiGhwmCxCnS4A3K9r4v2tzsUm-ccLt-I,3199
53
- azure/ai/evaluation/_evaluators/_meteor/__init__.py,sha256=209na3pPsdmcuYpYHUYtqQybCpc3yZkc93HnRdicSlI,266
54
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=62AD1DVH_T_t9MDK4GEcvujC0Aka-SEggxEAmc68byg,3315
55
- azure/ai/evaluation/_evaluators/_protected_material/__init__.py,sha256=eRAQIU9diVXfO5bp6aLWxZoYUvOsrDIfy1gnDOeNTiI,109
56
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py,sha256=6eZXjp2iXSaTNCx_8_8A96lhH5cdm3rmeJfvzMJHV9k,4233
57
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py,sha256=A12UsRVIebGvy9FtZLBPsOIAWUskBt8iuhRdILyRcSo,112
58
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py,sha256=pRB48BwbnCuozVo7hpblh_ZnApWzmrqiTx0JqhH4ioY,4204
59
- azure/ai/evaluation/_evaluators/_qa/__init__.py,sha256=bcXfT--C0hjym2haqd1B2-u9bDciyM0ThOFtU1Q69sk,244
60
- azure/ai/evaluation/_evaluators/_qa/_qa.py,sha256=QSpk4wKIBdwVaI0-u4iRVlnezgxhPnSEAWRUxSwOJM8,3792
61
- azure/ai/evaluation/_evaluators/_relevance/__init__.py,sha256=JlxytW32Nl8pbE-fI3GRpfgVuY9EG6zxIAn5VZGSwyc,265
62
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py,sha256=YFLxC8VQ9kvEGK6jge7_yQCxXM1yPChy9d-dANVJA60,4507
63
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty,sha256=AO70ho2nMhBtKcl_q4nKFW4kA1LjYsmSfymNa-Cbcrw,3735
64
- azure/ai/evaluation/_evaluators/_rouge/__init__.py,sha256=kusCDaYcXogDugGefRP8MQSn9xv107oDbrMCqZ6K4GA,291
65
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py,sha256=IbD10QZMYzaXKqr6q4Diy7p06yhJ3kT8pMv2jnyMNtQ,3444
66
- azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTkRHG5NpN5s9XoiTSN4I8POWEkLA,268
67
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=z_mUu0lL_gZI041PZvIBgSZ9Aj7j8wUgyy5OPFlBJ_w,4495
68
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=p2Tb4IW6QnP2BaGRQsAicW4V0B23Oezhf5l3Hau0nxU,4770
69
- azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
70
- azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=-A9EpdZXr5wZzo_-cF_OX8mMAjy8AgKjNXt1D6Up0NU,5819
71
- azure/ai/evaluation/simulator/__init__.py,sha256=4O7O6esevfOX0Vb7Eo6nl_m9Qx_WDytwfSgZ7-E1HkE,485
72
- azure/ai/evaluation/simulator/_adversarial_scenario.py,sha256=SxpyMw5wmM5-fiUjl1_oJH0GQEnsa7ASso10MAr2Hjw,1030
73
- azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=toUDdMOD4JrsTOh9tsFTLTBz87o6YLgoY6djXnbO7Fk,20577
74
- azure/ai/evaluation/simulator/_constants.py,sha256=xM-Or2x7RytfoeBM3N7Vt4JQDJX66UdL3CPz0YN5rvE,485
75
- azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=ACd4HZBtY18stKBTGW7iHEq0hn42gNK453V_pioOcxA,11702
76
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=4HX_034m2j8CBNBVSap9CfwKJX00l0fbvuXJyuZYavU,9639
77
- azure/ai/evaluation/simulator/_tracing.py,sha256=svCnjlI09AQ4aNH8yeXZiE6RKg9Pi4q66oQ8LstKJBk,2968
78
- azure/ai/evaluation/simulator/_utils.py,sha256=v-_nzpNj8RYnSyBeXcdxqu-jj1Xj_UgYdc21Gty8uwY,4219
79
- azure/ai/evaluation/simulator/simulator.py,sha256=902a7P5DGrkN1blbCTzHGDEFhA60LlxdjVEJVVa4cSQ,28464
80
- azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=Qxu2x1cJ2fPTQtYhhNP5C4-d8P33Sru4Zn6xAgroDso,12774
81
- azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=9a2plS1QzyuvWbJYj6rY128401e6d_n_yiBNekIDkww,7251
82
- azure/ai/evaluation/simulator/_conversation/constants.py,sha256=3v7zkjPwJAPbSpJYIK6VOZZy70bJXMo_QTVqSFGlq9A,984
83
- azure/ai/evaluation/simulator/_helpers/__init__.py,sha256=CD2fGrUgEE3puIm5QYqbkCN1AtntpF-3hRbF98hkhqE,203
84
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py,sha256=7BBLH78b7YDelHDLbAIwf-IO9s9cAEtn-RRXmNReHdc,1017
85
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py,sha256=NjsmHy-bb0yeqKVz9j7f7KIlTEB7MeZe1NzFOLBdBm4,2531
86
- azure/ai/evaluation/simulator/_model_tools/__init__.py,sha256=aMv5apb7uVjuhMF9ohhA5kQmo652hrGIJlhdl3y2R1I,835
87
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py,sha256=qhYmG2r4IAKDePtf9DdqgvNGYlc0xjH4x5sShGxS-lA,5070
88
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py,sha256=fHE6OUc1drkABwGQ5L0ctrqARPjv5uwO-u7yOJhdCuQ,8397
89
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py,sha256=ITPiciPQQkZa5s2E8pa-x3lKrf8rp7sbr_WSPpECOnc,6725
90
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py,sha256=hpxBYui1eaw9_lDJJMnKj8CSWFDXiTa7XAKXA4lS0J4,5482
91
- azure/ai/evaluation/simulator/_model_tools/models.py,sha256=5pUESJR5b5rzp9W7B3Aaokz_HLHTSLjR13YKumLAxqs,21813
92
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=lNSioz2XiGQJb-AXRNYm2JCLMivZKa3JlHfol2Jd7fY,2244
93
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=00zLVfNgHZdlbC2XvBedSrwDJOaAhl3B1ohE3LKsGg4,928
94
- azure_ai_evaluation-1.0.0b1.dist-info/METADATA,sha256=1XGkdZ7U-s9NVdHGktn_nk5S8BBUUQA-jrf-_VUpdVU,12268
95
- azure_ai_evaluation-1.0.0b1.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
96
- azure_ai_evaluation-1.0.0b1.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
97
- azure_ai_evaluation-1.0.0b1.dist-info/RECORD,,