azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +51 -6
- azure/ai/evaluation/_aoai/__init__.py +1 -1
- azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
- azure/ai/evaluation/_aoai/label_grader.py +3 -2
- azure/ai/evaluation/_aoai/python_grader.py +84 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +91 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
- azure/ai/evaluation/_azure/_envs.py +9 -10
- azure/ai/evaluation/_azure/_token_manager.py +7 -1
- azure/ai/evaluation/_common/constants.py +11 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
- azure/ai/evaluation/_common/onedp/__init__.py +32 -32
- azure/ai/evaluation/_common/onedp/_client.py +136 -139
- azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
- azure/ai/evaluation/_common/onedp/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -50
- azure/ai/evaluation/_common/onedp/_version.py +9 -9
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
- azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
- azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
- azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5657
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/rai_service.py +88 -52
- azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
- azure/ai/evaluation/_common/utils.py +188 -10
- azure/ai/evaluation/_constants.py +2 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +9 -8
- azure/ai/evaluation/_converters/_models.py +46 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +2 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +73 -25
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +210 -94
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +132 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
- azure/ai/evaluation/_evaluate/_utils.py +25 -17
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +4 -4
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +20 -12
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +6 -6
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +45 -11
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +28 -18
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +11 -8
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +11 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +12 -9
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -7
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +37 -64
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +5 -5
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +4 -4
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +12 -8
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +31 -26
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -4
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +14 -7
- azure/ai/evaluation/_evaluators/_qa/_qa.py +5 -5
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +62 -15
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +140 -59
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +21 -26
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +5 -5
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +22 -22
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +7 -6
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +4 -4
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +27 -24
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +354 -66
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +175 -183
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +99 -21
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +20 -12
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +10 -7
- azure/ai/evaluation/_exceptions.py +10 -0
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +6 -3
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +117 -32
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +2 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +2 -2
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +33 -41
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +195 -111
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +3 -1
- azure/ai/evaluation/red_team/_agent/__init__.py +1 -1
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +68 -71
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +103 -145
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +26 -6
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +62 -71
- azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
- azure/ai/evaluation/red_team/_attack_strategy.py +2 -1
- azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
- azure/ai/evaluation/red_team/_default_converter.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +1947 -1040
- azure/ai/evaluation/red_team/_red_team_result.py +49 -38
- azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +39 -34
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +163 -138
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +14 -14
- azure/ai/evaluation/red_team/_utils/constants.py +1 -13
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
- azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +31 -4
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +33 -25
- azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +31 -17
- azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +18 -6
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -24
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +30 -10
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
- azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
- azure/ai/evaluation/simulator/_simulator.py +21 -8
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/METADATA +46 -3
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/RECORD +141 -136
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
from typing import Dict, List, Optional,
|
|
4
|
+
from typing import Dict, List, Optional, TypedDict
|
|
5
5
|
import json
|
|
6
6
|
from azure.ai.evaluation._common._experimental import experimental
|
|
7
7
|
|
|
@@ -9,7 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
@experimental
|
|
10
10
|
class RiskCategorySummary(TypedDict):
|
|
11
11
|
"""Summary of attack success rates across risk categories.
|
|
12
|
-
|
|
12
|
+
|
|
13
13
|
:param overall_asr: Overall attack success rate across all risk categories
|
|
14
14
|
:type overall_asr: float
|
|
15
15
|
:param overall_total: Total number of attacks across all risk categories
|
|
@@ -41,6 +41,7 @@ class RiskCategorySummary(TypedDict):
|
|
|
41
41
|
:param self_harm_successful_attacks: Number of successful attacks for self-harm risk category
|
|
42
42
|
:type self_harm_successful_attacks: int
|
|
43
43
|
"""
|
|
44
|
+
|
|
44
45
|
overall_asr: float
|
|
45
46
|
overall_total: int
|
|
46
47
|
overall_successful_attacks: int
|
|
@@ -61,7 +62,7 @@ class RiskCategorySummary(TypedDict):
|
|
|
61
62
|
@experimental
|
|
62
63
|
class AttackTechniqueSummary(TypedDict):
|
|
63
64
|
"""Summary of attack success rates across complexity levels.
|
|
64
|
-
|
|
65
|
+
|
|
65
66
|
:param overall_asr: Overall attack success rate across all complexity levels
|
|
66
67
|
:type overall_asr: float
|
|
67
68
|
:param overall_total: Total number of attacks across all complexity levels
|
|
@@ -93,6 +94,7 @@ class AttackTechniqueSummary(TypedDict):
|
|
|
93
94
|
:param difficult_complexity_successful_attacks: Number of successful attacks for difficult complexity level
|
|
94
95
|
:type difficult_complexity_successful_attacks: int
|
|
95
96
|
"""
|
|
97
|
+
|
|
96
98
|
overall_asr: float
|
|
97
99
|
overall_total: int
|
|
98
100
|
overall_successful_attacks: int
|
|
@@ -113,7 +115,7 @@ class AttackTechniqueSummary(TypedDict):
|
|
|
113
115
|
@experimental
|
|
114
116
|
class JointRiskAttackSummaryItem(TypedDict):
|
|
115
117
|
"""Summary of attack success rates for a specific risk category across complexity levels.
|
|
116
|
-
|
|
118
|
+
|
|
117
119
|
:param risk_category: The risk category being summarized
|
|
118
120
|
:type risk_category: str
|
|
119
121
|
:param baseline_asr: Attack success rate for baseline complexity level
|
|
@@ -125,6 +127,7 @@ class JointRiskAttackSummaryItem(TypedDict):
|
|
|
125
127
|
:param difficult_complexity_asr: Attack success rate for difficult complexity level
|
|
126
128
|
:type difficult_complexity_asr: float
|
|
127
129
|
"""
|
|
130
|
+
|
|
128
131
|
risk_category: str
|
|
129
132
|
baseline_asr: float
|
|
130
133
|
easy_complexity_asr: float
|
|
@@ -135,7 +138,7 @@ class JointRiskAttackSummaryItem(TypedDict):
|
|
|
135
138
|
@experimental
|
|
136
139
|
class RedTeamingScorecard(TypedDict):
|
|
137
140
|
"""TypedDict representation of a Red Team Agent scorecard with the updated structure.
|
|
138
|
-
|
|
141
|
+
|
|
139
142
|
:param risk_category_summary: Overall metrics by risk category
|
|
140
143
|
:type risk_category_summary: List[RiskCategorySummary]
|
|
141
144
|
:param attack_technique_summary: Overall metrics by attack technique complexity
|
|
@@ -145,6 +148,7 @@ class RedTeamingScorecard(TypedDict):
|
|
|
145
148
|
:param detailed_joint_risk_attack_asr: Detailed ASR information broken down by complexity level, risk category, and converter
|
|
146
149
|
:type detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
|
|
147
150
|
"""
|
|
151
|
+
|
|
148
152
|
risk_category_summary: List[RiskCategorySummary]
|
|
149
153
|
attack_technique_summary: List[AttackTechniqueSummary]
|
|
150
154
|
joint_risk_attack_summary: List[JointRiskAttackSummaryItem]
|
|
@@ -154,7 +158,7 @@ class RedTeamingScorecard(TypedDict):
|
|
|
154
158
|
@experimental
|
|
155
159
|
class AttackObjectiveSource(TypedDict):
|
|
156
160
|
"""Information about how attack objectives were generated.
|
|
157
|
-
|
|
161
|
+
|
|
158
162
|
:param application_scenario: The application scenario used for generating attack objectives
|
|
159
163
|
:type application_scenario: str
|
|
160
164
|
:param risk_categories: List of risk categories targeted by the attack objectives
|
|
@@ -164,6 +168,7 @@ class AttackObjectiveSource(TypedDict):
|
|
|
164
168
|
:param policy_document: Policy document used for generating attack objectives
|
|
165
169
|
:type policy_document: str
|
|
166
170
|
"""
|
|
171
|
+
|
|
167
172
|
application_scenario: str
|
|
168
173
|
risk_categories: List[str]
|
|
169
174
|
custom_attack_seed_prompts: str
|
|
@@ -173,23 +178,27 @@ class AttackObjectiveSource(TypedDict):
|
|
|
173
178
|
@experimental
|
|
174
179
|
class RedTeamingParameters(TypedDict):
|
|
175
180
|
"""TypedDict representation of parameters used in a red team evaluation with the updated structure.
|
|
176
|
-
|
|
181
|
+
|
|
177
182
|
:param attack_objective_generated_from: Information about the source of attack objectives
|
|
178
183
|
:type attack_objective_generated_from: AttackObjectiveSource
|
|
179
184
|
:param attack_complexity: List of complexity levels used in the attacks
|
|
180
185
|
:type attack_complexity: List[str]
|
|
181
186
|
:param techniques_used: Dictionary mapping complexity levels to lists of converter techniques used
|
|
182
187
|
:type techniques_used: Dict[str, List[str]]
|
|
188
|
+
:param attack_success_thresholds: Thresholds used for determining attack success per risk category
|
|
189
|
+
:type attack_success_thresholds: Dict[str, Any]
|
|
183
190
|
"""
|
|
191
|
+
|
|
184
192
|
attack_objective_generated_from: AttackObjectiveSource
|
|
185
193
|
attack_complexity: List[str]
|
|
186
194
|
techniques_used: Dict[str, List[str]]
|
|
195
|
+
attack_success_thresholds: Dict[str, Dict[str, int]]
|
|
187
196
|
|
|
188
197
|
|
|
189
198
|
@experimental
|
|
190
199
|
class RiskAssessment(TypedDict):
|
|
191
200
|
"""Assessment of content safety risks in a conversation.
|
|
192
|
-
|
|
201
|
+
|
|
193
202
|
:param violence: Assessment of violence-related content
|
|
194
203
|
:type violence: Dict[str, str]
|
|
195
204
|
:param sexual: Assessment of sexual content
|
|
@@ -199,6 +208,7 @@ class RiskAssessment(TypedDict):
|
|
|
199
208
|
:param hate_unfair: Assessment of hate or unfairness content
|
|
200
209
|
:type hate_unfair: Dict[str, str]
|
|
201
210
|
"""
|
|
211
|
+
|
|
202
212
|
violence: Dict[str, str]
|
|
203
213
|
sexual: Dict[str, str]
|
|
204
214
|
self_harm: Dict[str, str]
|
|
@@ -208,7 +218,7 @@ class RiskAssessment(TypedDict):
|
|
|
208
218
|
@experimental
|
|
209
219
|
class AttackDetails(TypedDict):
|
|
210
220
|
"""TypedDict representation of a simulated conversation in a red team evaluation.
|
|
211
|
-
|
|
221
|
+
|
|
212
222
|
:param attack_success: Whether the attack was successful
|
|
213
223
|
:type attack_success: Optional[bool]
|
|
214
224
|
:param attack_technique: The attack technique used in the conversation
|
|
@@ -221,13 +231,17 @@ class AttackDetails(TypedDict):
|
|
|
221
231
|
:type conversation: List[Dict[str, str]]
|
|
222
232
|
:param risk_assessment: Dictionary containing content safety assessment for the conversation
|
|
223
233
|
:type risk_assessment: Optional[RiskAssessment]
|
|
234
|
+
:param attack_success_threshold: The threshold value used to determine attack success
|
|
235
|
+
:type attack_success_threshold: Optional[int]
|
|
224
236
|
"""
|
|
237
|
+
|
|
225
238
|
attack_success: Optional[bool]
|
|
226
239
|
attack_technique: str
|
|
227
240
|
attack_complexity: str
|
|
228
241
|
risk_category: str
|
|
229
242
|
conversation: List[Dict[str, str]]
|
|
230
243
|
risk_assessment: Optional[RiskAssessment]
|
|
244
|
+
attack_success_threshold: Optional[int]
|
|
231
245
|
|
|
232
246
|
|
|
233
247
|
@experimental
|
|
@@ -243,6 +257,7 @@ class ScanResult(TypedDict):
|
|
|
243
257
|
:param studio_url: Optional URL for the studio
|
|
244
258
|
:type studio_url: Optional[str]
|
|
245
259
|
"""
|
|
260
|
+
|
|
246
261
|
scorecard: RedTeamingScorecard
|
|
247
262
|
parameters: RedTeamingParameters
|
|
248
263
|
attack_details: List[AttackDetails]
|
|
@@ -250,12 +265,8 @@ class ScanResult(TypedDict):
|
|
|
250
265
|
|
|
251
266
|
|
|
252
267
|
@experimental
|
|
253
|
-
class RedTeamResult
|
|
254
|
-
def __init__(
|
|
255
|
-
self,
|
|
256
|
-
scan_result: Optional[ScanResult] = None,
|
|
257
|
-
attack_details: Optional[List[AttackDetails]] = None
|
|
258
|
-
):
|
|
268
|
+
class RedTeamResult:
|
|
269
|
+
def __init__(self, scan_result: Optional[ScanResult] = None, attack_details: Optional[List[AttackDetails]] = None):
|
|
259
270
|
self.scan_result = scan_result
|
|
260
271
|
self.attack_details = attack_details
|
|
261
272
|
|
|
@@ -271,32 +282,32 @@ class RedTeamResult():
|
|
|
271
282
|
def to_scorecard(self) -> Optional[RedTeamingScorecard]:
|
|
272
283
|
"""Extracts the scorecard from a RedTeamResult object."""
|
|
273
284
|
return self.scan_result.get("scorecard", None) if self.scan_result else None
|
|
274
|
-
|
|
285
|
+
|
|
275
286
|
def to_eval_qr_json_lines(self) -> str:
|
|
276
287
|
"""
|
|
277
288
|
Converts conversations in messages format to query-response format suitable for evaluation.
|
|
278
|
-
|
|
289
|
+
|
|
279
290
|
The output format follows the JSONL pattern with each line containing:
|
|
280
291
|
{
|
|
281
292
|
"query": "user message content",
|
|
282
293
|
"response": "assistant message content",
|
|
283
294
|
"risk_category": "risk category",
|
|
284
295
|
"attack_strategy": "strategy name",
|
|
285
|
-
"attack_complexity": "complexity level",
|
|
296
|
+
"attack_complexity": "complexity level",
|
|
286
297
|
"attack_success": "true|false", (if available from evaluation)
|
|
287
298
|
"category": "risk category", (if available from evaluation)
|
|
288
299
|
"severity_level": "low|medium|high", (if available from evaluation)
|
|
289
300
|
"threshold": "threshold value" (if available from evaluation)
|
|
290
301
|
}
|
|
291
|
-
|
|
302
|
+
|
|
292
303
|
:returns: A list of strings containing query-response pairs in JSONL format.
|
|
293
304
|
:rtype: List[str]
|
|
294
305
|
"""
|
|
295
306
|
if not self.attack_details:
|
|
296
307
|
return ""
|
|
297
|
-
|
|
308
|
+
|
|
298
309
|
result_lines = []
|
|
299
|
-
|
|
310
|
+
|
|
300
311
|
for conversation in self.attack_details:
|
|
301
312
|
messages = conversation.get("conversation", [])
|
|
302
313
|
attack_technique = conversation.get("attack_technique", "")
|
|
@@ -304,12 +315,12 @@ class RedTeamResult():
|
|
|
304
315
|
risk_category = conversation.get("risk_category", "")
|
|
305
316
|
attack_success = conversation.get("attack_success")
|
|
306
317
|
risk_assessment = conversation.get("risk_assessment", {})
|
|
307
|
-
|
|
318
|
+
|
|
308
319
|
for i in range(0, len(messages) - 1, 2):
|
|
309
320
|
if i + 1 < len(messages):
|
|
310
321
|
user_msg = messages[i]
|
|
311
322
|
assistant_msg = messages[i + 1]
|
|
312
|
-
|
|
323
|
+
|
|
313
324
|
if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
|
|
314
325
|
# Create the base query-response pair
|
|
315
326
|
qr_pair = {
|
|
@@ -317,24 +328,24 @@ class RedTeamResult():
|
|
|
317
328
|
"response": assistant_msg.get("content", ""),
|
|
318
329
|
"risk_category": risk_category,
|
|
319
330
|
"attack_strategy": attack_technique,
|
|
320
|
-
"attack_complexity": attack_complexity
|
|
331
|
+
"attack_complexity": attack_complexity,
|
|
321
332
|
}
|
|
322
|
-
|
|
333
|
+
|
|
323
334
|
# Add evaluation data if available
|
|
324
335
|
if attack_success is not None:
|
|
325
336
|
qr_pair["attack_success"] = str(attack_success).lower()
|
|
326
|
-
|
|
337
|
+
|
|
327
338
|
# Add risk assessment data if available
|
|
328
339
|
for category, assessment in risk_assessment.items() if risk_assessment else {}:
|
|
329
340
|
if assessment and assessment.get("severity_label", None):
|
|
330
341
|
qr_pair["category"] = category
|
|
331
342
|
qr_pair["severity_level"] = assessment.get("severity_label", "").lower()
|
|
332
343
|
# Add threshold if available in the future
|
|
333
|
-
|
|
344
|
+
|
|
334
345
|
result_lines.append(json.dumps(qr_pair))
|
|
335
|
-
|
|
346
|
+
|
|
336
347
|
return result_lines
|
|
337
|
-
|
|
348
|
+
|
|
338
349
|
def attack_simulation(self) -> str:
|
|
339
350
|
"""
|
|
340
351
|
Returns the attack simulation data in a human-readable format.
|
|
@@ -343,9 +354,9 @@ class RedTeamResult():
|
|
|
343
354
|
"""
|
|
344
355
|
if not self.attack_details:
|
|
345
356
|
return ""
|
|
346
|
-
|
|
357
|
+
|
|
347
358
|
result_lines = []
|
|
348
|
-
|
|
359
|
+
|
|
349
360
|
for conversation in self.attack_details:
|
|
350
361
|
messages = conversation.get("conversation", [])
|
|
351
362
|
attack_technique = conversation.get("attack_technique", "")
|
|
@@ -353,30 +364,30 @@ class RedTeamResult():
|
|
|
353
364
|
risk_category = conversation.get("risk_category", "")
|
|
354
365
|
attack_success = conversation.get("attack_success")
|
|
355
366
|
risk_assessment = conversation.get("risk_assessment", {})
|
|
356
|
-
|
|
367
|
+
|
|
357
368
|
result_lines.append(f"Attack Technique: {attack_technique}")
|
|
358
369
|
result_lines.append(f"Attack Complexity: {attack_complexity}")
|
|
359
370
|
result_lines.append(f"Risk Category: {risk_category}")
|
|
360
371
|
result_lines.append("")
|
|
361
|
-
|
|
372
|
+
|
|
362
373
|
for i in range(0, len(messages) - 1, 2):
|
|
363
374
|
if i + 1 < len(messages):
|
|
364
375
|
user_msg = messages[i]
|
|
365
376
|
assistant_msg = messages[i + 1]
|
|
366
|
-
|
|
377
|
+
|
|
367
378
|
if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
|
|
368
379
|
result_lines.append(f"User: {user_msg.get('content', '')}")
|
|
369
380
|
result_lines.append(f"Assistant: {assistant_msg.get('content', '')}")
|
|
370
381
|
result_lines.append("")
|
|
371
|
-
|
|
382
|
+
|
|
372
383
|
if attack_success is not None:
|
|
373
384
|
result_lines.append(f"Attack Success: {'Successful' if attack_success else 'Failed'}")
|
|
374
385
|
result_lines.append("")
|
|
375
|
-
|
|
386
|
+
|
|
376
387
|
for category, assessment in risk_assessment.items() if risk_assessment else {}:
|
|
377
388
|
if assessment and assessment.get("severity_label", None):
|
|
378
389
|
result_lines.append(f"Category: {category}")
|
|
379
390
|
result_lines.append(f"Severity Level: {assessment.get('severity_label', '')}")
|
|
380
391
|
result_lines.append("")
|
|
381
|
-
|
|
382
|
-
return "\n".join(result_lines)
|
|
392
|
+
|
|
393
|
+
return "\n".join(result_lines)
|
|
@@ -11,11 +11,16 @@ import asyncio
|
|
|
11
11
|
import re
|
|
12
12
|
from typing import Dict, Optional, Any, Tuple, List
|
|
13
13
|
from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
|
|
14
|
-
from azure.ai.evaluation.simulator._model_tools._generated_rai_client import
|
|
14
|
+
from azure.ai.evaluation.simulator._model_tools._generated_rai_client import (
|
|
15
|
+
GeneratedRAIClient,
|
|
16
|
+
)
|
|
15
17
|
from pyrit.models import PromptRequestResponse, construct_response_from_request
|
|
16
18
|
from pyrit.prompt_target import PromptChatTarget
|
|
17
|
-
|
|
18
|
-
from .metric_mapping import
|
|
19
|
+
|
|
20
|
+
from .metric_mapping import (
|
|
21
|
+
get_metric_from_risk_category,
|
|
22
|
+
get_annotation_task_from_risk_category,
|
|
23
|
+
)
|
|
19
24
|
from .._attack_objective_generator import RiskCategory
|
|
20
25
|
|
|
21
26
|
|
|
@@ -23,13 +28,13 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
|
|
|
23
28
|
"""A class to handle chat-based interactions with the RAI service for evaluation purposes."""
|
|
24
29
|
|
|
25
30
|
def __init__(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
self,
|
|
32
|
+
credential: Any,
|
|
33
|
+
azure_ai_project,
|
|
34
|
+
risk_category: RiskCategory,
|
|
35
|
+
logger: Optional[logging.Logger] = None,
|
|
36
|
+
evaluator_name: Optional[str] = None,
|
|
37
|
+
) -> None:
|
|
33
38
|
"""Initialize the RAIServiceEvalChatTarget.
|
|
34
39
|
|
|
35
40
|
:param endpoint: The endpoint URL for the RAI service.
|
|
@@ -44,36 +49,38 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
|
|
|
44
49
|
self.credential = credential
|
|
45
50
|
self.azure_ai_project = azure_ai_project
|
|
46
51
|
|
|
47
|
-
async def send_prompt_async(
|
|
52
|
+
async def send_prompt_async(
|
|
53
|
+
self, *, prompt_request: PromptRequestResponse, objective: str = ""
|
|
54
|
+
) -> PromptRequestResponse:
|
|
48
55
|
self.logger.info("Starting send_prompt_async operation")
|
|
49
56
|
self._validate_request(prompt_request=prompt_request)
|
|
50
|
-
|
|
51
|
-
thing_to_eval = prompt_request.request_pieces[0].to_dict()['original_value']
|
|
52
57
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
}
|
|
58
|
+
thing_to_eval = prompt_request.request_pieces[0].to_dict()["original_value"]
|
|
59
|
+
|
|
60
|
+
thing_to_eval_qr = {"query": "query", "response": thing_to_eval}
|
|
57
61
|
|
|
58
62
|
metric_name = get_metric_from_risk_category(self.risk_category)
|
|
59
|
-
|
|
60
|
-
evaluation_result = await evaluate_with_rai_service(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
annotation_task = get_annotation_task_from_risk_category(self.risk_category)
|
|
64
|
+
evaluation_result = await evaluate_with_rai_service(
|
|
65
|
+
thing_to_eval_qr,
|
|
66
|
+
metric_name=metric_name,
|
|
67
|
+
project_scope=self.azure_ai_project,
|
|
68
|
+
credential=self.credential,
|
|
69
|
+
annotation_task=annotation_task,
|
|
70
|
+
)
|
|
64
71
|
self.logger.info(f"Evaluation result: {evaluation_result}")
|
|
65
|
-
|
|
72
|
+
|
|
66
73
|
# Extract the relevant data from evaluation_result
|
|
67
74
|
score = evaluation_result.get(f"{metric_name}_score", 0)
|
|
68
75
|
reason = evaluation_result.get(f"{metric_name}_reason", "")
|
|
69
76
|
threshold = evaluation_result.get(f"{metric_name}_threshold", 3)
|
|
70
77
|
result = evaluation_result.get(metric_name, "")
|
|
71
|
-
|
|
78
|
+
|
|
72
79
|
# Calculate pass/fail using the same logic as in _evaluate_conversation
|
|
73
80
|
# Convert to boolean then to string for PyRIT's true/false score
|
|
74
81
|
# Ensure we're working with numeric values for comparison
|
|
75
82
|
boolean_result = float(score) > float(threshold)
|
|
76
|
-
|
|
83
|
+
|
|
77
84
|
# Format the response in a way that PyRIT's scorer can parse
|
|
78
85
|
# Use "true"/"false" string for score_value which is required for true_false score type
|
|
79
86
|
scorer_compatible_response = {
|
|
@@ -84,13 +91,13 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
|
|
|
84
91
|
"raw_score": score,
|
|
85
92
|
"threshold": threshold,
|
|
86
93
|
"result": result,
|
|
87
|
-
"risk_category": self.risk_category
|
|
88
|
-
}
|
|
94
|
+
"risk_category": self.risk_category,
|
|
95
|
+
},
|
|
89
96
|
}
|
|
90
|
-
|
|
97
|
+
|
|
91
98
|
# Convert to JSON string
|
|
92
99
|
response_json = json.dumps(scorer_compatible_response)
|
|
93
|
-
|
|
100
|
+
|
|
94
101
|
# Construct the response
|
|
95
102
|
response = construct_response_from_request(
|
|
96
103
|
request=prompt_request.request_pieces[0],
|
|
@@ -98,19 +105,18 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
|
|
|
98
105
|
)
|
|
99
106
|
self.logger.info(f"Constructed response: {response}")
|
|
100
107
|
return response
|
|
101
|
-
|
|
102
108
|
|
|
103
109
|
def is_json_response_supported(self) -> bool:
|
|
104
110
|
"""Check if JSON response is supported.
|
|
105
|
-
|
|
111
|
+
|
|
106
112
|
:return: True if JSON response is supported, False otherwise
|
|
107
113
|
"""
|
|
108
114
|
# This target supports JSON responses
|
|
109
115
|
return True
|
|
110
|
-
|
|
116
|
+
|
|
111
117
|
def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
|
|
112
118
|
"""Validate the request.
|
|
113
|
-
|
|
119
|
+
|
|
114
120
|
:param prompt_request: The prompt request
|
|
115
121
|
"""
|
|
116
122
|
if len(prompt_request.request_pieces) != 1:
|
|
@@ -118,4 +124,3 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
|
|
|
118
124
|
|
|
119
125
|
if prompt_request.request_pieces[0].converted_value_data_type != "text":
|
|
120
126
|
raise ValueError("This target only supports text prompt input.")
|
|
121
|
-
|