azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show
  1. azure/ai/evaluation/__init__.py +27 -1
  2. azure/ai/evaluation/_azure/_models.py +6 -6
  3. azure/ai/evaluation/_common/constants.py +6 -2
  4. azure/ai/evaluation/_common/rai_service.py +39 -5
  5. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  6. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  7. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  8. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  9. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  10. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  11. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  12. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  13. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  14. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  15. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  16. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  17. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  18. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  19. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  20. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  21. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  22. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  23. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  24. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
  25. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  26. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  27. azure/ai/evaluation/_common/utils.py +23 -3
  28. azure/ai/evaluation/_constants.py +7 -0
  29. azure/ai/evaluation/_converters/__init__.py +3 -0
  30. azure/ai/evaluation/_converters/_ai_services.py +804 -0
  31. azure/ai/evaluation/_converters/_models.py +302 -0
  32. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
  33. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
  34. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  35. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  36. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
  37. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
  38. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
  39. azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
  40. azure/ai/evaluation/_evaluate/_evaluate.py +109 -64
  41. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
  42. azure/ai/evaluation/_evaluate/_utils.py +3 -3
  43. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
  44. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  45. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
  46. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
  47. azure/ai/evaluation/_evaluators/_common/_base_eval.py +44 -4
  48. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +4 -2
  49. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +44 -5
  50. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
  51. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
  52. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
  53. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
  54. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
  55. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
  56. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
  57. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
  58. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
  59. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +22 -4
  60. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  61. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
  62. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
  63. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
  64. azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
  65. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
  66. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  67. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +158 -0
  68. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
  69. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
  70. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
  72. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
  73. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  74. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
  75. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
  76. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  77. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
  78. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
  79. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  80. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
  81. azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
  82. azure/ai/evaluation/_exceptions.py +5 -0
  83. azure/ai/evaluation/_legacy/__init__.py +3 -0
  84. azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
  85. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  86. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  87. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  88. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  89. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  90. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  91. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  92. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  93. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  94. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  95. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  96. azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
  97. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
  98. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  99. azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
  100. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
  101. azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
  102. azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
  103. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  104. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
  105. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  106. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
  107. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
  108. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  109. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  110. azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
  111. azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
  112. azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
  113. azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
  114. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  115. azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
  116. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  117. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
  118. azure/ai/evaluation/_version.py +1 -1
  119. azure/ai/evaluation/red_team/__init__.py +19 -0
  120. azure/ai/evaluation/red_team/_attack_objective_generator.py +195 -0
  121. azure/ai/evaluation/red_team/_attack_strategy.py +45 -0
  122. azure/ai/evaluation/red_team/_callback_chat_target.py +74 -0
  123. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  124. azure/ai/evaluation/red_team/_red_team.py +1887 -0
  125. azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  126. azure/ai/evaluation/red_team/_utils/__init__.py +3 -0
  127. azure/ai/evaluation/red_team/_utils/constants.py +65 -0
  128. azure/ai/evaluation/red_team/_utils/formatting_utils.py +165 -0
  129. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  130. azure/ai/evaluation/red_team/_utils/strategy_utils.py +192 -0
  131. azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
  132. azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
  133. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
  134. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
  135. azure/ai/evaluation/simulator/_simulator.py +1 -1
  136. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +80 -15
  137. azure_ai_evaluation-1.5.0.dist-info/RECORD +207 -0
  138. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +1 -1
  139. azure/ai/evaluation/simulator/_tracing.py +0 -89
  140. azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
  141. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
  142. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,382 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Dict, List, Optional, TypedDict
5
+ import json
6
+ from azure.ai.evaluation._common._experimental import experimental
7
+
8
+
9
+ @experimental
10
+ class RiskCategorySummary(TypedDict):
11
+ """Summary of attack success rates across risk categories.
12
+
13
+ :param overall_asr: Overall attack success rate across all risk categories
14
+ :type overall_asr: float
15
+ :param overall_total: Total number of attacks across all risk categories
16
+ :type overall_total: int
17
+ :param overall_successful_attacks: Number of successful attacks across all risk categories
18
+ :type overall_successful_attacks: int
19
+ :param hate_unfairness_asr: Attack success rate for hate/unfairness risk category
20
+ :type hate_unfairness_asr: float
21
+ :param hate_unfairness_total: Total number of attacks for hate/unfairness risk category
22
+ :type hate_unfairness_total: int
23
+ :param hate_unfairness_successful_attacks: Number of successful attacks for hate/unfairness risk category
24
+ :type hate_unfairness_successful_attacks: int
25
+ :param violence_asr: Attack success rate for violence risk category
26
+ :type violence_asr: float
27
+ :param violence_total: Total number of attacks for violence risk category
28
+ :type violence_total: int
29
+ :param violence_successful_attacks: Number of successful attacks for violence risk category
30
+ :type violence_successful_attacks: int
31
+ :param sexual_asr: Attack success rate for sexual risk category
32
+ :type sexual_asr: float
33
+ :param sexual_total: Total number of attacks for sexual risk category
34
+ :type sexual_total: int
35
+ :param sexual_successful_attacks: Number of successful attacks for sexual risk category
36
+ :type sexual_successful_attacks: int
37
+ :param self_harm_asr: Attack success rate for self-harm risk category
38
+ :type self_harm_asr: float
39
+ :param self_harm_total: Total number of attacks for self-harm risk category
40
+ :type self_harm_total: int
41
+ :param self_harm_successful_attacks: Number of successful attacks for self-harm risk category
42
+ :type self_harm_successful_attacks: int
43
+ """
44
+ overall_asr: float
45
+ overall_total: int
46
+ overall_successful_attacks: int
47
+ hate_unfairness_asr: float
48
+ hate_unfairness_total: int
49
+ hate_unfairness_successful_attacks: int
50
+ violence_asr: float
51
+ violence_total: int
52
+ violence_successful_attacks: int
53
+ sexual_asr: float
54
+ sexual_total: int
55
+ sexual_successful_attacks: int
56
+ self_harm_asr: float
57
+ self_harm_total: int
58
+ self_harm_successful_attacks: int
59
+
60
+
61
+ @experimental
62
+ class AttackTechniqueSummary(TypedDict):
63
+ """Summary of attack success rates across complexity levels.
64
+
65
+ :param overall_asr: Overall attack success rate across all complexity levels
66
+ :type overall_asr: float
67
+ :param overall_total: Total number of attacks across all complexity levels
68
+ :type overall_total: int
69
+ :param overall_successful_attacks: Number of successful attacks across all complexity levels
70
+ :type overall_successful_attacks: int
71
+ :param baseline_asr: Attack success rate for baseline complexity level
72
+ :type baseline_asr: float
73
+ :param baseline_total: Total number of attacks for baseline complexity level
74
+ :type baseline_total: int
75
+ :param baseline_successful_attacks: Number of successful attacks for baseline complexity level
76
+ :type baseline_successful_attacks: int
77
+ :param easy_complexity_asr: Attack success rate for easy complexity level
78
+ :type easy_complexity_asr: float
79
+ :param easy_complexity_total: Total number of attacks for easy complexity level
80
+ :type easy_complexity_total: int
81
+ :param easy_complexity_successful_attacks: Number of successful attacks for easy complexity level
82
+ :type easy_complexity_successful_attacks: int
83
+ :param moderate_complexity_asr: Attack success rate for moderate complexity level
84
+ :type moderate_complexity_asr: float
85
+ :param moderate_complexity_total: Total number of attacks for moderate complexity level
86
+ :type moderate_complexity_total: int
87
+ :param moderate_complexity_successful_attacks: Number of successful attacks for moderate complexity level
88
+ :type moderate_complexity_successful_attacks: int
89
+ :param difficult_complexity_asr: Attack success rate for difficult complexity level
90
+ :type difficult_complexity_asr: float
91
+ :param difficult_complexity_total: Total number of attacks for difficult complexity level
92
+ :type difficult_complexity_total: int
93
+ :param difficult_complexity_successful_attacks: Number of successful attacks for difficult complexity level
94
+ :type difficult_complexity_successful_attacks: int
95
+ """
96
+ overall_asr: float
97
+ overall_total: int
98
+ overall_successful_attacks: int
99
+ baseline_asr: float
100
+ baseline_total: int
101
+ baseline_successful_attacks: int
102
+ easy_complexity_asr: float
103
+ easy_complexity_total: int
104
+ easy_complexity_successful_attacks: int
105
+ moderate_complexity_asr: float
106
+ moderate_complexity_total: int
107
+ moderate_complexity_successful_attacks: int
108
+ difficult_complexity_asr: float
109
+ difficult_complexity_total: int
110
+ difficult_complexity_successful_attacks: int
111
+
112
+
113
+ @experimental
114
+ class JointRiskAttackSummaryItem(TypedDict):
115
+ """Summary of attack success rates for a specific risk category across complexity levels.
116
+
117
+ :param risk_category: The risk category being summarized
118
+ :type risk_category: str
119
+ :param baseline_asr: Attack success rate for baseline complexity level
120
+ :type baseline_asr: float
121
+ :param easy_complexity_asr: Attack success rate for easy complexity level
122
+ :type easy_complexity_asr: float
123
+ :param moderate_complexity_asr: Attack success rate for moderate complexity level
124
+ :type moderate_complexity_asr: float
125
+ :param difficult_complexity_asr: Attack success rate for difficult complexity level
126
+ :type difficult_complexity_asr: float
127
+ """
128
+ risk_category: str
129
+ baseline_asr: float
130
+ easy_complexity_asr: float
131
+ moderate_complexity_asr: float
132
+ difficult_complexity_asr: float
133
+
134
+
135
+ @experimental
136
+ class RedTeamingScorecard(TypedDict):
137
+ """TypedDict representation of a Red Team Agent scorecard with the updated structure.
138
+
139
+ :param risk_category_summary: Overall metrics by risk category
140
+ :type risk_category_summary: List[RiskCategorySummary]
141
+ :param attack_technique_summary: Overall metrics by attack technique complexity
142
+ :type attack_technique_summary: List[AttackTechniqueSummary]
143
+ :param joint_risk_attack_summary: Detailed metrics by risk category and complexity level
144
+ :type joint_risk_attack_summary: List[JointRiskAttackSummaryItem]
145
+ :param detailed_joint_risk_attack_asr: Detailed ASR information broken down by complexity level, risk category, and converter
146
+ :type detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
147
+ """
148
+ risk_category_summary: List[RiskCategorySummary]
149
+ attack_technique_summary: List[AttackTechniqueSummary]
150
+ joint_risk_attack_summary: List[JointRiskAttackSummaryItem]
151
+ detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
152
+
153
+
154
+ @experimental
155
+ class AttackObjectiveSource(TypedDict):
156
+ """Information about how attack objectives were generated.
157
+
158
+ :param application_scenario: The application scenario used for generating attack objectives
159
+ :type application_scenario: str
160
+ :param risk_categories: List of risk categories targeted by the attack objectives
161
+ :type risk_categories: List[str]
162
+ :param custom_attack_seed_prompts: Custom prompts used to seed attack objective generation
163
+ :type custom_attack_seed_prompts: str
164
+ :param policy_document: Policy document used for generating attack objectives
165
+ :type policy_document: str
166
+ """
167
+ application_scenario: str
168
+ risk_categories: List[str]
169
+ custom_attack_seed_prompts: str
170
+ policy_document: str
171
+
172
+
173
+ @experimental
174
+ class RedTeamingParameters(TypedDict):
175
+ """TypedDict representation of parameters used in a red team evaluation with the updated structure.
176
+
177
+ :param attack_objective_generated_from: Information about the source of attack objectives
178
+ :type attack_objective_generated_from: AttackObjectiveSource
179
+ :param attack_complexity: List of complexity levels used in the attacks
180
+ :type attack_complexity: List[str]
181
+ :param techniques_used: Dictionary mapping complexity levels to lists of converter techniques used
182
+ :type techniques_used: Dict[str, List[str]]
183
+ """
184
+ attack_objective_generated_from: AttackObjectiveSource
185
+ attack_complexity: List[str]
186
+ techniques_used: Dict[str, List[str]]
187
+
188
+
189
+ @experimental
190
+ class RiskAssessment(TypedDict):
191
+ """Assessment of content safety risks in a conversation.
192
+
193
+ :param violence: Assessment of violence-related content
194
+ :type violence: Dict[str, str]
195
+ :param sexual: Assessment of sexual content
196
+ :type sexual: Dict[str, str]
197
+ :param self_harm: Assessment of self-harm content
198
+ :type self_harm: Dict[str, str]
199
+ :param hate_unfair: Assessment of hate or unfairness content
200
+ :type hate_unfair: Dict[str, str]
201
+ """
202
+ violence: Dict[str, str]
203
+ sexual: Dict[str, str]
204
+ self_harm: Dict[str, str]
205
+ hate_unfair: Dict[str, str]
206
+
207
+
208
+ @experimental
209
+ class AttackDetails(TypedDict):
210
+ """TypedDict representation of a simulated conversation in a red team evaluation.
211
+
212
+ :param attack_success: Whether the attack was successful
213
+ :type attack_success: Optional[bool]
214
+ :param attack_technique: The attack technique used in the conversation
215
+ :type attack_technique: str
216
+ :param attack_complexity: The complexity level of the attack
217
+ :type attack_complexity: str
218
+ :param risk_category: The risk category targeted by the attack
219
+ :type risk_category: str
220
+ :param conversation: List of messages exchanged in the conversation
221
+ :type conversation: List[Dict[str, str]]
222
+ :param risk_assessment: Dictionary containing content safety assessment for the conversation
223
+ :type risk_assessment: Optional[RiskAssessment]
224
+ """
225
+ attack_success: Optional[bool]
226
+ attack_technique: str
227
+ attack_complexity: str
228
+ risk_category: str
229
+ conversation: List[Dict[str, str]]
230
+ risk_assessment: Optional[RiskAssessment]
231
+
232
+
233
+ @experimental
234
+ class ScanResult(TypedDict):
235
+ """TypedDict representation of a Red Team Agent evaluation result with the updated structure.
236
+
237
+ :param scorecard: Scorecard containing summary and detailed ASR information
238
+ :type scorecard: RedTeamingScorecard
239
+ :param parameters: Parameters containing metadata about the evaluation run
240
+ :type parameters: RedTeamingParameters
241
+ :param attack_details: List of AttackDetails objects representing the conversations in the evaluation
242
+ :type attack_details: List[AttackDetails]
243
+ :param studio_url: Optional URL for the studio
244
+ :type studio_url: Optional[str]
245
+ """
246
+ scorecard: RedTeamingScorecard
247
+ parameters: RedTeamingParameters
248
+ attack_details: List[AttackDetails]
249
+ studio_url: Optional[str]
250
+
251
+
252
+ @experimental
253
+ class RedTeamResult():
254
+ def __init__(
255
+ self,
256
+ scan_result: Optional[ScanResult] = None,
257
+ attack_details: Optional[List[AttackDetails]] = None
258
+ ):
259
+ self.scan_result = scan_result
260
+ self.attack_details = attack_details
261
+
262
+ def to_json(self) -> str:
263
+ """
264
+ Converts a RedTeamResult object to a JSON-serializable dictionary.
265
+
266
+ :returns: A string containing the RedTeamResult in JSON format.
267
+ :rtype: str
268
+ """
269
+ return json.dumps(self.scan_result) if self.scan_result else ""
270
+
271
+ def to_scorecard(self) -> Optional[RedTeamingScorecard]:
272
+ """Extracts the scorecard from a RedTeamResult object."""
273
+ return self.scan_result.get("scorecard", None) if self.scan_result else None
274
+
275
+ def to_eval_qr_json_lines(self) -> str:
276
+ """
277
+ Converts conversations in messages format to query-response format suitable for evaluation.
278
+
279
+ The output format follows the JSONL pattern with each line containing:
280
+ {
281
+ "query": "user message content",
282
+ "response": "assistant message content",
283
+ "risk_category": "risk category",
284
+ "attack_strategy": "strategy name",
285
+ "attack_complexity": "complexity level",
286
+ "attack_success": "true|false", (if available from evaluation)
287
+ "category": "risk category", (if available from evaluation)
288
+ "severity_level": "low|medium|high", (if available from evaluation)
289
+ "threshold": "threshold value" (if available from evaluation)
290
+ }
291
+
292
+ :returns: A list of strings containing query-response pairs in JSONL format.
293
+ :rtype: List[str]
294
+ """
295
+ if not self.attack_details:
296
+ return ""
297
+
298
+ result_lines = []
299
+
300
+ for conversation in self.attack_details:
301
+ messages = conversation.get("conversation", [])
302
+ attack_technique = conversation.get("attack_technique", "")
303
+ attack_complexity = conversation.get("attack_complexity", "")
304
+ risk_category = conversation.get("risk_category", "")
305
+ attack_success = conversation.get("attack_success")
306
+ risk_assessment = conversation.get("risk_assessment", {})
307
+
308
+ for i in range(0, len(messages) - 1, 2):
309
+ if i + 1 < len(messages):
310
+ user_msg = messages[i]
311
+ assistant_msg = messages[i + 1]
312
+
313
+ if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
314
+ # Create the base query-response pair
315
+ qr_pair = {
316
+ "query": user_msg.get("content", ""),
317
+ "response": assistant_msg.get("content", ""),
318
+ "risk_category": risk_category,
319
+ "attack_strategy": attack_technique,
320
+ "attack_complexity": attack_complexity
321
+ }
322
+
323
+ # Add evaluation data if available
324
+ if attack_success is not None:
325
+ qr_pair["attack_success"] = str(attack_success).lower()
326
+
327
+ # Add risk assessment data if available
328
+ for category, assessment in risk_assessment.items() if risk_assessment else {}:
329
+ if assessment and assessment.get("severity_label", None):
330
+ qr_pair["category"] = category
331
+ qr_pair["severity_level"] = assessment.get("severity_label", "").lower()
332
+ # Add threshold if available in the future
333
+
334
+ result_lines.append(json.dumps(qr_pair))
335
+
336
+ return result_lines
337
+
338
+ def attack_simulation(self) -> str:
339
+ """
340
+ Returns the attack simulation data in a human-readable format.
341
+ :returns: A string containing the attack simulation data in a human-readable format.
342
+ :rtype: str
343
+ """
344
+ if not self.attack_details:
345
+ return ""
346
+
347
+ result_lines = []
348
+
349
+ for conversation in self.attack_details:
350
+ messages = conversation.get("conversation", [])
351
+ attack_technique = conversation.get("attack_technique", "")
352
+ attack_complexity = conversation.get("attack_complexity", "")
353
+ risk_category = conversation.get("risk_category", "")
354
+ attack_success = conversation.get("attack_success")
355
+ risk_assessment = conversation.get("risk_assessment", {})
356
+
357
+ result_lines.append(f"Attack Technique: {attack_technique}")
358
+ result_lines.append(f"Attack Complexity: {attack_complexity}")
359
+ result_lines.append(f"Risk Category: {risk_category}")
360
+ result_lines.append("")
361
+
362
+ for i in range(0, len(messages) - 1, 2):
363
+ if i + 1 < len(messages):
364
+ user_msg = messages[i]
365
+ assistant_msg = messages[i + 1]
366
+
367
+ if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
368
+ result_lines.append(f"User: {user_msg.get('content', '')}")
369
+ result_lines.append(f"Assistant: {assistant_msg.get('content', '')}")
370
+ result_lines.append("")
371
+
372
+ if attack_success is not None:
373
+ result_lines.append(f"Attack Success: {'Successful' if attack_success else 'Failed'}")
374
+ result_lines.append("")
375
+
376
+ for category, assessment in risk_assessment.items() if risk_assessment else {}:
377
+ if assessment and assessment.get("severity_label", None):
378
+ result_lines.append(f"Category: {category}")
379
+ result_lines.append(f"Severity Level: {assessment.get('severity_label', '')}")
380
+ result_lines.append("")
381
+
382
+ return "\n".join(result_lines)
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
@@ -0,0 +1,65 @@
1
+ """
2
+ Constants used in Red Team Agent.
3
+ """
4
+
5
+ import os
6
+ # Update imports to use direct import from azure.ai.evaluation
7
+ from azure.ai.evaluation import ViolenceEvaluator, HateUnfairnessEvaluator, SexualEvaluator, SelfHarmEvaluator
8
+ from .._attack_strategy import AttackStrategy
9
+ from .._attack_objective_generator import RiskCategory
10
+
11
+ # File extensions
12
+ BASELINE_IDENTIFIER = "baseline"
13
+ DATA_EXT = ".jsonl"
14
+ RESULTS_EXT = ".json"
15
+
16
+ # Mapping of attack strategies to complexity levels
17
+
18
+ ATTACK_STRATEGY_COMPLEXITY_MAP = {
19
+ str(AttackStrategy.Baseline.value): "baseline",
20
+ str(AttackStrategy.AnsiAttack.value): "easy",
21
+ str(AttackStrategy.AsciiArt.value): "easy",
22
+ str(AttackStrategy.AsciiSmuggler.value): "easy",
23
+ str(AttackStrategy.Atbash.value): "easy",
24
+ str(AttackStrategy.Base64.value): "easy",
25
+ str(AttackStrategy.Binary.value): "easy",
26
+ str(AttackStrategy.Caesar.value): "easy",
27
+ str(AttackStrategy.CharacterSpace.value): "easy",
28
+ str(AttackStrategy.CharSwap.value): "easy",
29
+ str(AttackStrategy.Diacritic.value): "easy",
30
+ str(AttackStrategy.Flip.value): "easy",
31
+ str(AttackStrategy.Leetspeak.value): "easy",
32
+ str(AttackStrategy.Morse.value): "easy",
33
+ str(AttackStrategy.ROT13.value): "easy",
34
+ str(AttackStrategy.SuffixAppend.value): "easy",
35
+ str(AttackStrategy.StringJoin.value): "easy",
36
+ str(AttackStrategy.UnicodeConfusable.value): "easy",
37
+ str(AttackStrategy.UnicodeSubstitution.value): "easy",
38
+ str(AttackStrategy.Url.value): "easy",
39
+ str(AttackStrategy.EASY.value): "easy",
40
+ str(AttackStrategy.Tense.value): "moderate",
41
+ str(AttackStrategy.MODERATE.value): "moderate",
42
+ str(AttackStrategy.DIFFICULT.value): "difficult",
43
+ str(AttackStrategy.Jailbreak.value): "easy"
44
+ }
45
+
46
+ # Mapping of risk categories to their evaluators
47
+ RISK_CATEGORY_EVALUATOR_MAP = {
48
+ RiskCategory.Violence: ViolenceEvaluator,
49
+ RiskCategory.HateUnfairness: HateUnfairnessEvaluator,
50
+ RiskCategory.Sexual: SexualEvaluator,
51
+ RiskCategory.SelfHarm: SelfHarmEvaluator
52
+ }
53
+
54
+ # Task timeouts and status codes
55
+ INTERNAL_TASK_TIMEOUT = 120
56
+
57
+ # Task status definitions
58
+ TASK_STATUS = {
59
+ "PENDING": "pending",
60
+ "RUNNING": "running",
61
+ "COMPLETED": "completed",
62
+ "FAILED": "failed",
63
+ "TIMEOUT": "timeout",
64
+ "INCOMPLETE": "incomplete",
65
+ }
@@ -0,0 +1,165 @@
1
+ """
2
+ Utility functions for formatting, conversion, and processing in Red Team Agent.
3
+ """
4
+
5
+ import json
6
+ import pandas as pd
7
+ import math
8
+ from datetime import datetime
9
+ from typing import Dict, List, Union, Any, Optional, cast
10
+ from .._attack_strategy import AttackStrategy
11
+ from .._red_team_result import RedTeamResult
12
+ from pyrit.models import ChatMessage
13
+
14
+
15
+ def message_to_dict(message: ChatMessage) -> Dict[str, str]:
16
+ """Convert a ChatMessage to dictionary format.
17
+
18
+ :param message: The chat message to convert
19
+ :type message: ChatMessage
20
+ :return: Dictionary representation with role and content
21
+ :rtype: Dict[str, str]
22
+ """
23
+ return {
24
+ "role": message.role,
25
+ "content": message.content,
26
+ }
27
+
28
+
29
+ def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
30
+ """Get a string name for an attack strategy or list of strategies.
31
+
32
+ :param attack_strategy: The attack strategy or list of strategies
33
+ :type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
34
+ :return: A string name for the strategy
35
+ :rtype: str
36
+ """
37
+ if isinstance(attack_strategy, List):
38
+ return "_".join([str(strategy.value) for strategy in attack_strategy])
39
+ else:
40
+ return str(attack_strategy.value)
41
+
42
+
43
+ def get_flattened_attack_strategies(attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]) -> List[Union[AttackStrategy, List[AttackStrategy]]]:
44
+ """Flatten complex attack strategies into individual strategies.
45
+
46
+ :param attack_strategies: List of attack strategies to flatten
47
+ :type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
48
+ :return: Flattened list of attack strategies
49
+ :rtype: List[Union[AttackStrategy, List[AttackStrategy]]]
50
+ """
51
+ flattened_strategies = []
52
+ seen_strategies = set()
53
+ attack_strategies_temp = attack_strategies.copy()
54
+
55
+ if AttackStrategy.EASY in attack_strategies_temp:
56
+ attack_strategies_temp.extend(
57
+ [
58
+ AttackStrategy.Base64,
59
+ AttackStrategy.Flip,
60
+ AttackStrategy.Morse
61
+ ]
62
+ )
63
+ attack_strategies_temp.remove(AttackStrategy.EASY)
64
+
65
+ if AttackStrategy.MODERATE in attack_strategies_temp:
66
+ attack_strategies_temp.extend([
67
+ AttackStrategy.Tense],
68
+ )
69
+ attack_strategies_temp.remove(AttackStrategy.MODERATE)
70
+ if AttackStrategy.DIFFICULT in attack_strategies_temp:
71
+ attack_strategies_temp.extend([
72
+ AttackStrategy.Compose([
73
+ AttackStrategy.Tense,
74
+ AttackStrategy.Base64
75
+ ]),
76
+ ])
77
+ attack_strategies_temp.remove(AttackStrategy.DIFFICULT)
78
+
79
+ ## Baseline is always included
80
+ attack_strategies_temp.append(AttackStrategy.Baseline)
81
+
82
+ for strategy in attack_strategies_temp:
83
+ if isinstance(strategy, List) and tuple(strategy) not in seen_strategies: # For composed strategies
84
+ flattened_strategies.append([s for s in strategy])
85
+ seen_strategies.add(tuple(strategy))
86
+ elif isinstance(strategy, AttackStrategy) and strategy not in seen_strategies: # For single strategies
87
+ flattened_strategies.append(strategy)
88
+ seen_strategies.add(strategy)
89
+
90
+ return flattened_strategies
91
+
92
+
93
+ def get_attack_success(result: str) -> bool:
94
+ """
95
+ Based on EVALUATION_PASS_FAIL_MAPPING, returns True denoting attack success if result matches the value of the EVALUATION_PASS_FAIL_MAPPING key False.
96
+ """
97
+ from .._red_team import EVALUATION_PASS_FAIL_MAPPING
98
+ return result == EVALUATION_PASS_FAIL_MAPPING.get(False, "true")
99
+
100
+
101
+ def format_scorecard(redteam_result: RedTeamResult) -> str:
102
+ """Format the RedTeamResult into a human-readable scorecard.
103
+
104
+ The scorecard presents a summary of attack success rates across different risk categories
105
+ and attack complexity levels in a tabular format.
106
+
107
+ :param redteam_result: The RedTeamResult object to format
108
+ :type redteam_result: Dict[str, Any]
109
+ :return: A formatted scorecard as a string
110
+ :rtype: str
111
+ """
112
+ scorecard = redteam_result["scorecard"]
113
+ risk_summary = scorecard["risk_category_summary"][0] if scorecard["risk_category_summary"] else {}
114
+ overall_asr = risk_summary.get("overall_asr", 0)
115
+
116
+ output = [f"Overall ASR: {overall_asr}%"]
117
+ overall_successes = risk_summary.get("overall_attack_successes", 0)
118
+ overall_total = risk_summary.get("overall_total", 0)
119
+ output.append(f"Attack Success: {overall_successes}/{overall_total} attacks were successful")
120
+
121
+
122
+ separator = "-" * 108
123
+ output.append(separator)
124
+ output.append(f"{'Risk Category':<15}| {'Baseline ASR':<14} | {'Easy-Complexity Attacks ASR':<28} | {'Moderate-Complexity Attacks ASR':<30} | {'Difficult-Complexity Attacks ASR':<30}")
125
+ output.append(separator)
126
+
127
+ for item in scorecard["joint_risk_attack_summary"]:
128
+ risk_category = item["risk_category"].replace("_", "-").capitalize()
129
+
130
+ baseline_val = item.get('baseline_asr')
131
+ easy_val = item.get('easy_complexity_asr')
132
+ moderate_val = item.get('moderate_complexity_asr')
133
+ difficult_val = item.get('difficult_complexity_asr')
134
+
135
+ baseline = "N/A" if is_none_or_nan(baseline_val) else f"{baseline_val}%"
136
+ easy = "N/A" if is_none_or_nan(easy_val) else f"{easy_val}%"
137
+ moderate = "N/A" if is_none_or_nan(moderate_val) else f"{moderate_val}%"
138
+ difficult = "N/A" if is_none_or_nan(difficult_val) else f"{difficult_val}%"
139
+
140
+ output.append(f"{risk_category:<15}| {baseline:<14} | {easy:<28} | {moderate:<31} | {difficult:<30}")
141
+
142
+ return "\n".join(output)
143
+
144
+
145
+ def is_none_or_nan(value: Any) -> bool:
146
+ """Check if a value is None or NaN."""
147
+ if value is None:
148
+ return True
149
+ if isinstance(value, float) and math.isnan(value):
150
+ return True
151
+ return False
152
+
153
+
154
+ def list_mean_nan_safe(data_list: List[Any]) -> float:
155
+ """Calculate the mean of a list, handling None and NaN values safely.
156
+
157
+ :param data_list: List of values to calculate mean for
158
+ :type data_list: List[Any]
159
+ :return: Mean value or 0.0 if list is empty after filtering
160
+ :rtype: float
161
+ """
162
+ filtered_list = [x for x in data_list if not is_none_or_nan(x)]
163
+ if not filtered_list:
164
+ return 0.0
165
+ return sum(filtered_list) / len(filtered_list)