azure-ai-evaluation 1.7.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +13 -2
- azure/ai/evaluation/_aoai/__init__.py +1 -1
- azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
- azure/ai/evaluation/_aoai/label_grader.py +3 -2
- azure/ai/evaluation/_aoai/score_model_grader.py +90 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
- azure/ai/evaluation/_azure/_envs.py +9 -10
- azure/ai/evaluation/_azure/_token_manager.py +7 -1
- azure/ai/evaluation/_common/constants.py +11 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
- azure/ai/evaluation/_common/onedp/__init__.py +32 -32
- azure/ai/evaluation/_common/onedp/_client.py +136 -139
- azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
- azure/ai/evaluation/_common/onedp/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -50
- azure/ai/evaluation/_common/onedp/_version.py +9 -9
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
- azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
- azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
- azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5655
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/rai_service.py +86 -50
- azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
- azure/ai/evaluation/_common/utils.py +124 -3
- azure/ai/evaluation/_constants.py +2 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +9 -8
- azure/ai/evaluation/_converters/_models.py +46 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +2 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +4 -4
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +64 -58
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +130 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
- azure/ai/evaluation/_evaluate/_utils.py +24 -15
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +3 -3
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +12 -11
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -5
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +15 -5
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +6 -1
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +13 -13
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +6 -6
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +34 -64
- azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +4 -4
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -2
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +3 -3
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -7
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +30 -25
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +2 -3
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +6 -6
- azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -4
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +8 -13
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -25
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +4 -4
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +25 -25
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +5 -5
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -3
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -14
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +43 -34
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +3 -3
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +12 -11
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +6 -6
- azure/ai/evaluation/_exceptions.py +10 -0
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +5 -10
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +193 -111
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +3 -1
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
- azure/ai/evaluation/red_team/_attack_strategy.py +4 -1
- azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
- azure/ai/evaluation/red_team/_default_converter.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +1622 -765
- azure/ai/evaluation/red_team/_red_team_result.py +43 -38
- azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +121 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +595 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +108 -0
- azure/ai/evaluation/red_team/_utils/constants.py +6 -12
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
- azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +33 -6
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +35 -25
- azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +34 -16
- azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +5 -5
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -23
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +25 -15
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
- azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
- azure/ai/evaluation/simulator/_simulator.py +9 -8
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/METADATA +24 -1
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/RECORD +135 -123
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
from typing import Dict, List, Optional,
|
|
4
|
+
from typing import Dict, List, Optional, TypedDict
|
|
5
5
|
import json
|
|
6
6
|
from azure.ai.evaluation._common._experimental import experimental
|
|
7
7
|
|
|
@@ -9,7 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
@experimental
|
|
10
10
|
class RiskCategorySummary(TypedDict):
|
|
11
11
|
"""Summary of attack success rates across risk categories.
|
|
12
|
-
|
|
12
|
+
|
|
13
13
|
:param overall_asr: Overall attack success rate across all risk categories
|
|
14
14
|
:type overall_asr: float
|
|
15
15
|
:param overall_total: Total number of attacks across all risk categories
|
|
@@ -41,6 +41,7 @@ class RiskCategorySummary(TypedDict):
|
|
|
41
41
|
:param self_harm_successful_attacks: Number of successful attacks for self-harm risk category
|
|
42
42
|
:type self_harm_successful_attacks: int
|
|
43
43
|
"""
|
|
44
|
+
|
|
44
45
|
overall_asr: float
|
|
45
46
|
overall_total: int
|
|
46
47
|
overall_successful_attacks: int
|
|
@@ -61,7 +62,7 @@ class RiskCategorySummary(TypedDict):
|
|
|
61
62
|
@experimental
|
|
62
63
|
class AttackTechniqueSummary(TypedDict):
|
|
63
64
|
"""Summary of attack success rates across complexity levels.
|
|
64
|
-
|
|
65
|
+
|
|
65
66
|
:param overall_asr: Overall attack success rate across all complexity levels
|
|
66
67
|
:type overall_asr: float
|
|
67
68
|
:param overall_total: Total number of attacks across all complexity levels
|
|
@@ -93,6 +94,7 @@ class AttackTechniqueSummary(TypedDict):
|
|
|
93
94
|
:param difficult_complexity_successful_attacks: Number of successful attacks for difficult complexity level
|
|
94
95
|
:type difficult_complexity_successful_attacks: int
|
|
95
96
|
"""
|
|
97
|
+
|
|
96
98
|
overall_asr: float
|
|
97
99
|
overall_total: int
|
|
98
100
|
overall_successful_attacks: int
|
|
@@ -113,7 +115,7 @@ class AttackTechniqueSummary(TypedDict):
|
|
|
113
115
|
@experimental
|
|
114
116
|
class JointRiskAttackSummaryItem(TypedDict):
|
|
115
117
|
"""Summary of attack success rates for a specific risk category across complexity levels.
|
|
116
|
-
|
|
118
|
+
|
|
117
119
|
:param risk_category: The risk category being summarized
|
|
118
120
|
:type risk_category: str
|
|
119
121
|
:param baseline_asr: Attack success rate for baseline complexity level
|
|
@@ -125,6 +127,7 @@ class JointRiskAttackSummaryItem(TypedDict):
|
|
|
125
127
|
:param difficult_complexity_asr: Attack success rate for difficult complexity level
|
|
126
128
|
:type difficult_complexity_asr: float
|
|
127
129
|
"""
|
|
130
|
+
|
|
128
131
|
risk_category: str
|
|
129
132
|
baseline_asr: float
|
|
130
133
|
easy_complexity_asr: float
|
|
@@ -135,7 +138,7 @@ class JointRiskAttackSummaryItem(TypedDict):
|
|
|
135
138
|
@experimental
|
|
136
139
|
class RedTeamingScorecard(TypedDict):
|
|
137
140
|
"""TypedDict representation of a Red Team Agent scorecard with the updated structure.
|
|
138
|
-
|
|
141
|
+
|
|
139
142
|
:param risk_category_summary: Overall metrics by risk category
|
|
140
143
|
:type risk_category_summary: List[RiskCategorySummary]
|
|
141
144
|
:param attack_technique_summary: Overall metrics by attack technique complexity
|
|
@@ -145,6 +148,7 @@ class RedTeamingScorecard(TypedDict):
|
|
|
145
148
|
:param detailed_joint_risk_attack_asr: Detailed ASR information broken down by complexity level, risk category, and converter
|
|
146
149
|
:type detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
|
|
147
150
|
"""
|
|
151
|
+
|
|
148
152
|
risk_category_summary: List[RiskCategorySummary]
|
|
149
153
|
attack_technique_summary: List[AttackTechniqueSummary]
|
|
150
154
|
joint_risk_attack_summary: List[JointRiskAttackSummaryItem]
|
|
@@ -154,7 +158,7 @@ class RedTeamingScorecard(TypedDict):
|
|
|
154
158
|
@experimental
|
|
155
159
|
class AttackObjectiveSource(TypedDict):
|
|
156
160
|
"""Information about how attack objectives were generated.
|
|
157
|
-
|
|
161
|
+
|
|
158
162
|
:param application_scenario: The application scenario used for generating attack objectives
|
|
159
163
|
:type application_scenario: str
|
|
160
164
|
:param risk_categories: List of risk categories targeted by the attack objectives
|
|
@@ -164,6 +168,7 @@ class AttackObjectiveSource(TypedDict):
|
|
|
164
168
|
:param policy_document: Policy document used for generating attack objectives
|
|
165
169
|
:type policy_document: str
|
|
166
170
|
"""
|
|
171
|
+
|
|
167
172
|
application_scenario: str
|
|
168
173
|
risk_categories: List[str]
|
|
169
174
|
custom_attack_seed_prompts: str
|
|
@@ -173,7 +178,7 @@ class AttackObjectiveSource(TypedDict):
|
|
|
173
178
|
@experimental
|
|
174
179
|
class RedTeamingParameters(TypedDict):
|
|
175
180
|
"""TypedDict representation of parameters used in a red team evaluation with the updated structure.
|
|
176
|
-
|
|
181
|
+
|
|
177
182
|
:param attack_objective_generated_from: Information about the source of attack objectives
|
|
178
183
|
:type attack_objective_generated_from: AttackObjectiveSource
|
|
179
184
|
:param attack_complexity: List of complexity levels used in the attacks
|
|
@@ -181,6 +186,7 @@ class RedTeamingParameters(TypedDict):
|
|
|
181
186
|
:param techniques_used: Dictionary mapping complexity levels to lists of converter techniques used
|
|
182
187
|
:type techniques_used: Dict[str, List[str]]
|
|
183
188
|
"""
|
|
189
|
+
|
|
184
190
|
attack_objective_generated_from: AttackObjectiveSource
|
|
185
191
|
attack_complexity: List[str]
|
|
186
192
|
techniques_used: Dict[str, List[str]]
|
|
@@ -189,7 +195,7 @@ class RedTeamingParameters(TypedDict):
|
|
|
189
195
|
@experimental
|
|
190
196
|
class RiskAssessment(TypedDict):
|
|
191
197
|
"""Assessment of content safety risks in a conversation.
|
|
192
|
-
|
|
198
|
+
|
|
193
199
|
:param violence: Assessment of violence-related content
|
|
194
200
|
:type violence: Dict[str, str]
|
|
195
201
|
:param sexual: Assessment of sexual content
|
|
@@ -199,6 +205,7 @@ class RiskAssessment(TypedDict):
|
|
|
199
205
|
:param hate_unfair: Assessment of hate or unfairness content
|
|
200
206
|
:type hate_unfair: Dict[str, str]
|
|
201
207
|
"""
|
|
208
|
+
|
|
202
209
|
violence: Dict[str, str]
|
|
203
210
|
sexual: Dict[str, str]
|
|
204
211
|
self_harm: Dict[str, str]
|
|
@@ -208,7 +215,7 @@ class RiskAssessment(TypedDict):
|
|
|
208
215
|
@experimental
|
|
209
216
|
class AttackDetails(TypedDict):
|
|
210
217
|
"""TypedDict representation of a simulated conversation in a red team evaluation.
|
|
211
|
-
|
|
218
|
+
|
|
212
219
|
:param attack_success: Whether the attack was successful
|
|
213
220
|
:type attack_success: Optional[bool]
|
|
214
221
|
:param attack_technique: The attack technique used in the conversation
|
|
@@ -222,6 +229,7 @@ class AttackDetails(TypedDict):
|
|
|
222
229
|
:param risk_assessment: Dictionary containing content safety assessment for the conversation
|
|
223
230
|
:type risk_assessment: Optional[RiskAssessment]
|
|
224
231
|
"""
|
|
232
|
+
|
|
225
233
|
attack_success: Optional[bool]
|
|
226
234
|
attack_technique: str
|
|
227
235
|
attack_complexity: str
|
|
@@ -243,6 +251,7 @@ class ScanResult(TypedDict):
|
|
|
243
251
|
:param studio_url: Optional URL for the studio
|
|
244
252
|
:type studio_url: Optional[str]
|
|
245
253
|
"""
|
|
254
|
+
|
|
246
255
|
scorecard: RedTeamingScorecard
|
|
247
256
|
parameters: RedTeamingParameters
|
|
248
257
|
attack_details: List[AttackDetails]
|
|
@@ -250,12 +259,8 @@ class ScanResult(TypedDict):
|
|
|
250
259
|
|
|
251
260
|
|
|
252
261
|
@experimental
|
|
253
|
-
class RedTeamResult
|
|
254
|
-
def __init__(
|
|
255
|
-
self,
|
|
256
|
-
scan_result: Optional[ScanResult] = None,
|
|
257
|
-
attack_details: Optional[List[AttackDetails]] = None
|
|
258
|
-
):
|
|
262
|
+
class RedTeamResult:
|
|
263
|
+
def __init__(self, scan_result: Optional[ScanResult] = None, attack_details: Optional[List[AttackDetails]] = None):
|
|
259
264
|
self.scan_result = scan_result
|
|
260
265
|
self.attack_details = attack_details
|
|
261
266
|
|
|
@@ -271,32 +276,32 @@ class RedTeamResult():
|
|
|
271
276
|
def to_scorecard(self) -> Optional[RedTeamingScorecard]:
|
|
272
277
|
"""Extracts the scorecard from a RedTeamResult object."""
|
|
273
278
|
return self.scan_result.get("scorecard", None) if self.scan_result else None
|
|
274
|
-
|
|
279
|
+
|
|
275
280
|
def to_eval_qr_json_lines(self) -> str:
|
|
276
281
|
"""
|
|
277
282
|
Converts conversations in messages format to query-response format suitable for evaluation.
|
|
278
|
-
|
|
283
|
+
|
|
279
284
|
The output format follows the JSONL pattern with each line containing:
|
|
280
285
|
{
|
|
281
286
|
"query": "user message content",
|
|
282
287
|
"response": "assistant message content",
|
|
283
288
|
"risk_category": "risk category",
|
|
284
289
|
"attack_strategy": "strategy name",
|
|
285
|
-
"attack_complexity": "complexity level",
|
|
290
|
+
"attack_complexity": "complexity level",
|
|
286
291
|
"attack_success": "true|false", (if available from evaluation)
|
|
287
292
|
"category": "risk category", (if available from evaluation)
|
|
288
293
|
"severity_level": "low|medium|high", (if available from evaluation)
|
|
289
294
|
"threshold": "threshold value" (if available from evaluation)
|
|
290
295
|
}
|
|
291
|
-
|
|
296
|
+
|
|
292
297
|
:returns: A list of strings containing query-response pairs in JSONL format.
|
|
293
298
|
:rtype: List[str]
|
|
294
299
|
"""
|
|
295
300
|
if not self.attack_details:
|
|
296
301
|
return ""
|
|
297
|
-
|
|
302
|
+
|
|
298
303
|
result_lines = []
|
|
299
|
-
|
|
304
|
+
|
|
300
305
|
for conversation in self.attack_details:
|
|
301
306
|
messages = conversation.get("conversation", [])
|
|
302
307
|
attack_technique = conversation.get("attack_technique", "")
|
|
@@ -304,12 +309,12 @@ class RedTeamResult():
|
|
|
304
309
|
risk_category = conversation.get("risk_category", "")
|
|
305
310
|
attack_success = conversation.get("attack_success")
|
|
306
311
|
risk_assessment = conversation.get("risk_assessment", {})
|
|
307
|
-
|
|
312
|
+
|
|
308
313
|
for i in range(0, len(messages) - 1, 2):
|
|
309
314
|
if i + 1 < len(messages):
|
|
310
315
|
user_msg = messages[i]
|
|
311
316
|
assistant_msg = messages[i + 1]
|
|
312
|
-
|
|
317
|
+
|
|
313
318
|
if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
|
|
314
319
|
# Create the base query-response pair
|
|
315
320
|
qr_pair = {
|
|
@@ -317,24 +322,24 @@ class RedTeamResult():
|
|
|
317
322
|
"response": assistant_msg.get("content", ""),
|
|
318
323
|
"risk_category": risk_category,
|
|
319
324
|
"attack_strategy": attack_technique,
|
|
320
|
-
"attack_complexity": attack_complexity
|
|
325
|
+
"attack_complexity": attack_complexity,
|
|
321
326
|
}
|
|
322
|
-
|
|
327
|
+
|
|
323
328
|
# Add evaluation data if available
|
|
324
329
|
if attack_success is not None:
|
|
325
330
|
qr_pair["attack_success"] = str(attack_success).lower()
|
|
326
|
-
|
|
331
|
+
|
|
327
332
|
# Add risk assessment data if available
|
|
328
333
|
for category, assessment in risk_assessment.items() if risk_assessment else {}:
|
|
329
334
|
if assessment and assessment.get("severity_label", None):
|
|
330
335
|
qr_pair["category"] = category
|
|
331
336
|
qr_pair["severity_level"] = assessment.get("severity_label", "").lower()
|
|
332
337
|
# Add threshold if available in the future
|
|
333
|
-
|
|
338
|
+
|
|
334
339
|
result_lines.append(json.dumps(qr_pair))
|
|
335
|
-
|
|
340
|
+
|
|
336
341
|
return result_lines
|
|
337
|
-
|
|
342
|
+
|
|
338
343
|
def attack_simulation(self) -> str:
|
|
339
344
|
"""
|
|
340
345
|
Returns the attack simulation data in a human-readable format.
|
|
@@ -343,9 +348,9 @@ class RedTeamResult():
|
|
|
343
348
|
"""
|
|
344
349
|
if not self.attack_details:
|
|
345
350
|
return ""
|
|
346
|
-
|
|
351
|
+
|
|
347
352
|
result_lines = []
|
|
348
|
-
|
|
353
|
+
|
|
349
354
|
for conversation in self.attack_details:
|
|
350
355
|
messages = conversation.get("conversation", [])
|
|
351
356
|
attack_technique = conversation.get("attack_technique", "")
|
|
@@ -353,30 +358,30 @@ class RedTeamResult():
|
|
|
353
358
|
risk_category = conversation.get("risk_category", "")
|
|
354
359
|
attack_success = conversation.get("attack_success")
|
|
355
360
|
risk_assessment = conversation.get("risk_assessment", {})
|
|
356
|
-
|
|
361
|
+
|
|
357
362
|
result_lines.append(f"Attack Technique: {attack_technique}")
|
|
358
363
|
result_lines.append(f"Attack Complexity: {attack_complexity}")
|
|
359
364
|
result_lines.append(f"Risk Category: {risk_category}")
|
|
360
365
|
result_lines.append("")
|
|
361
|
-
|
|
366
|
+
|
|
362
367
|
for i in range(0, len(messages) - 1, 2):
|
|
363
368
|
if i + 1 < len(messages):
|
|
364
369
|
user_msg = messages[i]
|
|
365
370
|
assistant_msg = messages[i + 1]
|
|
366
|
-
|
|
371
|
+
|
|
367
372
|
if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
|
|
368
373
|
result_lines.append(f"User: {user_msg.get('content', '')}")
|
|
369
374
|
result_lines.append(f"Assistant: {assistant_msg.get('content', '')}")
|
|
370
375
|
result_lines.append("")
|
|
371
|
-
|
|
376
|
+
|
|
372
377
|
if attack_success is not None:
|
|
373
378
|
result_lines.append(f"Attack Success: {'Successful' if attack_success else 'Failed'}")
|
|
374
379
|
result_lines.append("")
|
|
375
|
-
|
|
380
|
+
|
|
376
381
|
for category, assessment in risk_assessment.items() if risk_assessment else {}:
|
|
377
382
|
if assessment and assessment.get("severity_label", None):
|
|
378
383
|
result_lines.append(f"Category: {category}")
|
|
379
384
|
result_lines.append(f"Severity Level: {assessment.get('severity_label', '')}")
|
|
380
385
|
result_lines.append("")
|
|
381
|
-
|
|
382
|
-
return "\n".join(result_lines)
|
|
386
|
+
|
|
387
|
+
return "\n".join(result_lines)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import uuid
|
|
7
|
+
import os
|
|
8
|
+
import json
|
|
9
|
+
import traceback
|
|
10
|
+
import asyncio
|
|
11
|
+
import re
|
|
12
|
+
from typing import Dict, Optional, Any, Tuple, List
|
|
13
|
+
from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
|
|
14
|
+
from azure.ai.evaluation.simulator._model_tools._generated_rai_client import GeneratedRAIClient
|
|
15
|
+
from pyrit.models import PromptRequestResponse, construct_response_from_request
|
|
16
|
+
from pyrit.prompt_target import PromptChatTarget
|
|
17
|
+
from .constants import USER_AGENT
|
|
18
|
+
from .metric_mapping import get_metric_from_risk_category, get_annotation_task_from_risk_category
|
|
19
|
+
from .._attack_objective_generator import RiskCategory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RAIServiceEvalChatTarget(PromptChatTarget):
|
|
23
|
+
"""A class to handle chat-based interactions with the RAI service for evaluation purposes."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
credential: Any,
|
|
28
|
+
azure_ai_project,
|
|
29
|
+
risk_category: RiskCategory,
|
|
30
|
+
logger: Optional[logging.Logger] = None,
|
|
31
|
+
evaluator_name: Optional[str] = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Initialize the RAIServiceEvalChatTarget.
|
|
34
|
+
|
|
35
|
+
:param endpoint: The endpoint URL for the RAI service.
|
|
36
|
+
:type endpoint: str
|
|
37
|
+
:param credential: The credential object for authentication.
|
|
38
|
+
:type credential: Any
|
|
39
|
+
"""
|
|
40
|
+
super().__init__()
|
|
41
|
+
self.logger = logger
|
|
42
|
+
self.risk_category = risk_category
|
|
43
|
+
self.evaluator_name = evaluator_name
|
|
44
|
+
self.credential = credential
|
|
45
|
+
self.azure_ai_project = azure_ai_project
|
|
46
|
+
|
|
47
|
+
async def send_prompt_async(
|
|
48
|
+
self, *, prompt_request: PromptRequestResponse, objective: str = ""
|
|
49
|
+
) -> PromptRequestResponse:
|
|
50
|
+
self.logger.info("Starting send_prompt_async operation")
|
|
51
|
+
self._validate_request(prompt_request=prompt_request)
|
|
52
|
+
|
|
53
|
+
thing_to_eval = prompt_request.request_pieces[0].to_dict()["original_value"]
|
|
54
|
+
|
|
55
|
+
thing_to_eval_qr = {"query": "query", "response": thing_to_eval}
|
|
56
|
+
|
|
57
|
+
metric_name = get_metric_from_risk_category(self.risk_category)
|
|
58
|
+
annotation_task = get_annotation_task_from_risk_category(self.risk_category)
|
|
59
|
+
evaluation_result = await evaluate_with_rai_service(
|
|
60
|
+
thing_to_eval_qr,
|
|
61
|
+
metric_name=metric_name,
|
|
62
|
+
project_scope=self.azure_ai_project,
|
|
63
|
+
credential=self.credential,
|
|
64
|
+
annotation_task=annotation_task,
|
|
65
|
+
)
|
|
66
|
+
self.logger.info(f"Evaluation result: {evaluation_result}")
|
|
67
|
+
|
|
68
|
+
# Extract the relevant data from evaluation_result
|
|
69
|
+
score = evaluation_result.get(f"{metric_name}_score", 0)
|
|
70
|
+
reason = evaluation_result.get(f"{metric_name}_reason", "")
|
|
71
|
+
threshold = evaluation_result.get(f"{metric_name}_threshold", 3)
|
|
72
|
+
result = evaluation_result.get(metric_name, "")
|
|
73
|
+
|
|
74
|
+
# Calculate pass/fail using the same logic as in _evaluate_conversation
|
|
75
|
+
# Convert to boolean then to string for PyRIT's true/false score
|
|
76
|
+
# Ensure we're working with numeric values for comparison
|
|
77
|
+
boolean_result = float(score) > float(threshold)
|
|
78
|
+
|
|
79
|
+
# Format the response in a way that PyRIT's scorer can parse
|
|
80
|
+
# Use "true"/"false" string for score_value which is required for true_false score type
|
|
81
|
+
scorer_compatible_response = {
|
|
82
|
+
"score_value": boolean_result, # PyRIT requires lowercase "true" or "false" for true_false scores
|
|
83
|
+
"description": f"Threshold: {threshold}, Result: {result}",
|
|
84
|
+
"rationale": reason,
|
|
85
|
+
"metadata": {
|
|
86
|
+
"raw_score": score,
|
|
87
|
+
"threshold": threshold,
|
|
88
|
+
"result": result,
|
|
89
|
+
"risk_category": self.risk_category,
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# Convert to JSON string
|
|
94
|
+
response_json = json.dumps(scorer_compatible_response)
|
|
95
|
+
|
|
96
|
+
# Construct the response
|
|
97
|
+
response = construct_response_from_request(
|
|
98
|
+
request=prompt_request.request_pieces[0],
|
|
99
|
+
response_text_pieces=[response_json],
|
|
100
|
+
)
|
|
101
|
+
self.logger.info(f"Constructed response: {response}")
|
|
102
|
+
return response
|
|
103
|
+
|
|
104
|
+
def is_json_response_supported(self) -> bool:
|
|
105
|
+
"""Check if JSON response is supported.
|
|
106
|
+
|
|
107
|
+
:return: True if JSON response is supported, False otherwise
|
|
108
|
+
"""
|
|
109
|
+
# This target supports JSON responses
|
|
110
|
+
return True
|
|
111
|
+
|
|
112
|
+
def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
|
|
113
|
+
"""Validate the request.
|
|
114
|
+
|
|
115
|
+
:param prompt_request: The prompt request
|
|
116
|
+
"""
|
|
117
|
+
if len(prompt_request.request_pieces) != 1:
|
|
118
|
+
raise ValueError("This target only supports a single prompt request piece.")
|
|
119
|
+
|
|
120
|
+
if prompt_request.request_pieces[0].converted_value_data_type != "text":
|
|
121
|
+
raise ValueError("This target only supports text prompt input.")
|