azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (123) hide show
  1. azure/ai/evaluation/__init__.py +43 -1
  2. azure/ai/evaluation/_azure/_models.py +6 -6
  3. azure/ai/evaluation/_common/constants.py +6 -2
  4. azure/ai/evaluation/_common/rai_service.py +38 -4
  5. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  6. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  7. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  8. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  9. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  10. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  11. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  12. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  13. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  14. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  15. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  16. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  17. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  18. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  19. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  20. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  21. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  22. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  23. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  24. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
  25. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  26. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  27. azure/ai/evaluation/_common/utils.py +22 -2
  28. azure/ai/evaluation/_constants.py +7 -0
  29. azure/ai/evaluation/_converters/__init__.py +3 -0
  30. azure/ai/evaluation/_converters/_ai_services.py +804 -0
  31. azure/ai/evaluation/_converters/_models.py +302 -0
  32. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
  33. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
  34. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  35. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  36. azure/ai/evaluation/_evaluate/_evaluate.py +31 -2
  37. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
  38. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  39. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
  40. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
  41. azure/ai/evaluation/_evaluators/_common/_base_eval.py +43 -3
  42. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +3 -1
  43. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +43 -4
  44. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
  45. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
  46. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
  47. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
  48. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
  49. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
  50. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
  51. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
  52. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
  53. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +21 -3
  54. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  55. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
  56. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
  57. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
  58. azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
  59. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
  60. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  61. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +157 -0
  62. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
  63. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
  64. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
  65. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
  66. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
  67. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  68. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
  69. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
  70. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
  72. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
  73. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  74. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
  75. azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
  76. azure/ai/evaluation/_exceptions.py +5 -0
  77. azure/ai/evaluation/_legacy/__init__.py +3 -0
  78. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  79. azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
  80. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
  81. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  82. azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
  83. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
  84. azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
  85. azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
  86. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  87. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
  88. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  89. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
  90. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
  91. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  92. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  93. azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
  94. azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
  95. azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
  96. azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
  97. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  98. azure/ai/evaluation/_red_team/__init__.py +3 -0
  99. azure/ai/evaluation/_red_team/_attack_objective_generator.py +192 -0
  100. azure/ai/evaluation/_red_team/_attack_strategy.py +42 -0
  101. azure/ai/evaluation/_red_team/_callback_chat_target.py +74 -0
  102. azure/ai/evaluation/_red_team/_default_converter.py +21 -0
  103. azure/ai/evaluation/_red_team/_red_team.py +1858 -0
  104. azure/ai/evaluation/_red_team/_red_team_result.py +246 -0
  105. azure/ai/evaluation/_red_team/_utils/__init__.py +3 -0
  106. azure/ai/evaluation/_red_team/_utils/constants.py +64 -0
  107. azure/ai/evaluation/_red_team/_utils/formatting_utils.py +164 -0
  108. azure/ai/evaluation/_red_team/_utils/logging_utils.py +139 -0
  109. azure/ai/evaluation/_red_team/_utils/strategy_utils.py +188 -0
  110. azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
  111. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  112. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
  113. azure/ai/evaluation/_version.py +1 -1
  114. azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
  115. azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
  116. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
  117. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
  118. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/METADATA +69 -15
  119. azure_ai_evaluation-1.4.0.dist-info/RECORD +197 -0
  120. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/WHEEL +1 -1
  121. azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
  122. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/NOTICE.txt +0 -0
  123. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,246 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Dict, List, Optional, TypedDict
5
+ import json
6
+
7
+ class _RiskCategorySummary(TypedDict):
8
+ """Summary of attack success rates across risk categories."""
9
+ overall_asr: float
10
+ overall_total: int
11
+ overall_successful_attacks: int
12
+ hate_unfairness_asr: float
13
+ hate_unfairness_total: int
14
+ hate_unfairness_successful_attacks: int
15
+ violence_asr: float
16
+ violence_total: int
17
+ violence_successful_attacks: int
18
+ sexual_asr: float
19
+ sexual_total: int
20
+ sexual_successful_attacks: int
21
+ self_harm_asr: float
22
+ self_harm_total: int
23
+ self_harm_successful_attacks: int
24
+
25
+ class _AttackTechniqueSummary(TypedDict):
26
+ """Summary of attack success rates across complexity levels."""
27
+ overall_asr: float
28
+ overall_total: int
29
+ overall_successful_attacks: int
30
+ baseline_asr: float
31
+ baseline_total: int
32
+ baseline_successful_attacks: int
33
+ easy_complexity_asr: float
34
+ easy_complexity_total: int
35
+ easy_complexity_successful_attacks: int
36
+ moderate_complexity_asr: float
37
+ moderate_complexity_total: int
38
+ moderate_complexity_successful_attacks: int
39
+ difficult_complexity_asr: float
40
+ difficult_complexity_total: int
41
+ difficult_complexity_successful_attacks: int
42
+
43
+ class _JointRiskAttackSummaryItem(TypedDict):
44
+ """Summary of attack success rates for a specific risk category across complexity levels."""
45
+ risk_category: str
46
+ baseline_asr: float
47
+ easy_complexity_asr: float
48
+ moderate_complexity_asr: float
49
+ difficult_complexity_asr: float
50
+
51
+ class _RedTeamingScorecard(TypedDict):
52
+ """TypedDict representation of a Red Team Agent scorecard with the updated structure.
53
+
54
+ The scorecard contains four main sections:
55
+ - risk_category_summary: Overall metrics by risk category
56
+ - attack_technique_summary: Overall metrics by attack technique complexity
57
+ - joint_risk_attack_summary: Detailed metrics by risk category and complexity level
58
+ - detailed_joint_risk_attack_asr: Detailed ASR information broken down by complexity level, risk category, and converter
59
+ """
60
+ risk_category_summary: List[_RiskCategorySummary]
61
+ attack_technique_summary: List[_AttackTechniqueSummary]
62
+ joint_risk_attack_summary: List[_JointRiskAttackSummaryItem]
63
+ detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
64
+
65
+ class _AttackObjectiveSource(TypedDict):
66
+ """Information about how attack objectives were generated."""
67
+ application_scenario: str
68
+ risk_categories: List[str]
69
+ custom_attack_seed_prompts: str
70
+ policy_document: str
71
+
72
+ class _RedTeamingParameters(TypedDict):
73
+ """TypedDict representation of parameters used in a red team evaluation with the updated structure.
74
+
75
+ This class defines the structure for capturing metadata about an evaluation run, including:
76
+ - attack_objective_generated_from: Information about the source of attack objectives
77
+ - attack_complexity: List of complexity levels used in the attacks (e.g., "Easy", "Moderate", "Difficult")
78
+ - techniques_used: Dictionary mapping complexity levels to lists of converter techniques used
79
+ """
80
+ attack_objective_generated_from: _AttackObjectiveSource
81
+ attack_complexity: List[str]
82
+ techniques_used: Dict[str, List[str]]
83
+
84
+ class _RiskAssessment(TypedDict):
85
+ """Assessment of content safety risks in a conversation."""
86
+ violence: Dict[str, str]
87
+ sexual: Dict[str, str]
88
+ self_harm: Dict[str, str]
89
+ hate_unfair: Dict[str, str]
90
+
91
+ class _Conversation(TypedDict):
92
+ """TypedDict representation of a simulated conversation in a red team evaluation.
93
+
94
+ This class defines the structure for capturing information about a simulated conversation, including:
95
+ - attack_success: Whether the attack was successful
96
+ - attack_technique: The attack technique used in the conversation
97
+ - attack_complexity: The complexity level of the attack
98
+ - conversation: List of messages exchanged in the conversation
99
+ - risk_assessment: Dictionary containing content safety assessment for the conversation
100
+ """
101
+ attack_success: Optional[bool]
102
+ attack_technique: str
103
+ attack_complexity: str
104
+ risk_category: str
105
+ conversation: List[Dict[str, str]]
106
+ risk_assessment: Optional[_RiskAssessment]
107
+
108
+ class _RedTeamResult(TypedDict):
109
+ """TypedDict representation of a Red Team Agent evaluation result with the updated structure.
110
+
111
+ This class defines the structure for capturing the results of a red team evaluation, including:
112
+ - redteaming_scorecard: Scorecard containing summary and detailed ASR information
113
+ - redteaming_parameters: Parameters containing metadata about the evaluation run
114
+ - redteaming_data: List of _Conversation objects representing the conversations in the evaluation
115
+ """
116
+ redteaming_scorecard: _RedTeamingScorecard
117
+ redteaming_parameters: _RedTeamingParameters
118
+ redteaming_data: List[_Conversation]
119
+ studio_url: Optional[str]
120
+
121
+ class RedTeamOutput():
122
+ def __init__(self, red_team_result: Optional[_RedTeamResult] = None, redteaming_data: Optional[List[_Conversation]] = None):
123
+ self.red_team_result = red_team_result
124
+ self.redteaming_data = redteaming_data
125
+
126
+ def to_json(self) -> str:
127
+ """
128
+ Converts a _RedTeamResult object to a JSON-serializable dictionary.
129
+
130
+ :returns: A string containing the _RedTeamResult in JSON format.
131
+ :rtype: str
132
+ """
133
+ return json.dumps(self.red_team_result) if self.red_team_result else ""
134
+
135
+ def to_scorecard(self) -> Optional[_RedTeamingScorecard]:
136
+ """Extracts the scorecard from a _RedTeamResult object."""
137
+ return self.red_team_result.get("redteaming_scorecard", None) if self.red_team_result else None
138
+
139
+ def to_eval_qr_json_lines(self) -> str:
140
+ """
141
+ Converts conversations in messages format to query-response format suitable for evaluation.
142
+
143
+ The output format follows the JSONL pattern with each line containing:
144
+ {
145
+ "query": "user message content",
146
+ "response": "assistant message content",
147
+ "risk_category": "risk category",
148
+ "attack_strategy": "strategy name",
149
+ "attack_complexity": "complexity level",
150
+ "attack_success": "true|false", (if available from evaluation)
151
+ "category": "risk category", (if available from evaluation)
152
+ "severity_level": "low|medium|high", (if available from evaluation)
153
+ "threshold": "threshold value" (if available from evaluation)
154
+ }
155
+
156
+ :returns: A list of strings containing query-response pairs in JSONL format.
157
+ :rtype: List[str]
158
+ """
159
+ if not self.redteaming_data:
160
+ return ""
161
+
162
+ result_lines = []
163
+
164
+ for conversation in self.redteaming_data:
165
+ messages = conversation.get("conversation", [])
166
+ attack_technique = conversation.get("attack_technique", "")
167
+ attack_complexity = conversation.get("attack_complexity", "")
168
+ risk_category = conversation.get("risk_category", "")
169
+ attack_success = conversation.get("attack_success")
170
+ risk_assessment = conversation.get("risk_assessment", {})
171
+
172
+ for i in range(0, len(messages) - 1, 2):
173
+ if i + 1 < len(messages):
174
+ user_msg = messages[i]
175
+ assistant_msg = messages[i + 1]
176
+
177
+ if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
178
+ # Create the base query-response pair
179
+ qr_pair = {
180
+ "query": user_msg.get("content", ""),
181
+ "response": assistant_msg.get("content", ""),
182
+ "risk_category": risk_category,
183
+ "attack_strategy": attack_technique,
184
+ "attack_complexity": attack_complexity
185
+ }
186
+
187
+ # Add evaluation data if available
188
+ if attack_success is not None:
189
+ qr_pair["attack_success"] = str(attack_success).lower()
190
+
191
+ # Add risk assessment data if available
192
+ for category, assessment in risk_assessment.items() if risk_assessment else {}:
193
+ if assessment and assessment.get("severity_label", None):
194
+ qr_pair["category"] = category
195
+ qr_pair["severity_level"] = assessment.get("severity_label", "").lower()
196
+ # Add threshold if available in the future
197
+
198
+ result_lines.append(json.dumps(qr_pair))
199
+
200
+ return result_lines
201
+
202
+ def attack_simulation(self) -> str:
203
+ """
204
+ Returns the attack simulation data in a human-readable format.
205
+ :returns: A string containing the attack simulation data in a human-readable format.
206
+ :rtype: str
207
+ """
208
+ if not self.redteaming_data:
209
+ return ""
210
+
211
+ result_lines = []
212
+
213
+ for conversation in self.redteaming_data:
214
+ messages = conversation.get("conversation", [])
215
+ attack_technique = conversation.get("attack_technique", "")
216
+ attack_complexity = conversation.get("attack_complexity", "")
217
+ risk_category = conversation.get("risk_category", "")
218
+ attack_success = conversation.get("attack_success")
219
+ risk_assessment = conversation.get("risk_assessment", {})
220
+
221
+ result_lines.append(f"Attack Technique: {attack_technique}")
222
+ result_lines.append(f"Attack Complexity: {attack_complexity}")
223
+ result_lines.append(f"Risk Category: {risk_category}")
224
+ result_lines.append("")
225
+
226
+ for i in range(0, len(messages) - 1, 2):
227
+ if i + 1 < len(messages):
228
+ user_msg = messages[i]
229
+ assistant_msg = messages[i + 1]
230
+
231
+ if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
232
+ result_lines.append(f"User: {user_msg.get('content', '')}")
233
+ result_lines.append(f"Assistant: {assistant_msg.get('content', '')}")
234
+ result_lines.append("")
235
+
236
+ if attack_success is not None:
237
+ result_lines.append(f"Attack Success: {'Successful' if attack_success else 'Failed'}")
238
+ result_lines.append("")
239
+
240
+ for category, assessment in risk_assessment.items() if risk_assessment else {}:
241
+ if assessment and assessment.get("severity_label", None):
242
+ result_lines.append(f"Category: {category}")
243
+ result_lines.append(f"Severity Level: {assessment.get('severity_label', '')}")
244
+ result_lines.append("")
245
+
246
+ return "\n".join(result_lines)
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
@@ -0,0 +1,64 @@
1
+ """
2
+ Constants used in Red Team Agent.
3
+ """
4
+
5
+ import os
6
+ # Update imports to use direct import from azure.ai.evaluation
7
+ from azure.ai.evaluation import ViolenceEvaluator, HateUnfairnessEvaluator, SexualEvaluator, SelfHarmEvaluator
8
+ from .._attack_strategy import AttackStrategy
9
+ from .._attack_objective_generator import RiskCategory
10
+
11
+ # File extensions
12
+ BASELINE_IDENTIFIER = "baseline"
13
+ DATA_EXT = ".jsonl"
14
+ RESULTS_EXT = ".json"
15
+
16
+ # Mapping of attack strategies to complexity levels
17
+
18
+ ATTACK_STRATEGY_COMPLEXITY_MAP = {
19
+ str(AttackStrategy.Baseline.value): "baseline",
20
+ str(AttackStrategy.AnsiAttack.value): "easy",
21
+ str(AttackStrategy.AsciiArt.value): "easy",
22
+ str(AttackStrategy.AsciiSmuggler.value): "easy",
23
+ str(AttackStrategy.Atbash.value): "easy",
24
+ str(AttackStrategy.Base64.value): "easy",
25
+ str(AttackStrategy.Binary.value): "easy",
26
+ str(AttackStrategy.Caesar.value): "easy",
27
+ str(AttackStrategy.CharacterSpace.value): "easy",
28
+ str(AttackStrategy.CharSwap.value): "easy",
29
+ str(AttackStrategy.Diacritic.value): "easy",
30
+ str(AttackStrategy.Flip.value): "easy",
31
+ str(AttackStrategy.Leetspeak.value): "easy",
32
+ str(AttackStrategy.Morse.value): "easy",
33
+ str(AttackStrategy.ROT13.value): "easy",
34
+ str(AttackStrategy.SuffixAppend.value): "easy",
35
+ str(AttackStrategy.StringJoin.value): "easy",
36
+ str(AttackStrategy.UnicodeConfusable.value): "easy",
37
+ str(AttackStrategy.UnicodeSubstitution.value): "easy",
38
+ str(AttackStrategy.Url.value): "easy",
39
+ str(AttackStrategy.EASY.value): "easy",
40
+ str(AttackStrategy.Tense.value): "moderate",
41
+ str(AttackStrategy.MODERATE.value): "moderate",
42
+ str(AttackStrategy.DIFFICULT.value): "difficult",
43
+ str(AttackStrategy.Jailbreak.value): "easy"
44
+ }
45
+
46
+ # Mapping of risk categories to their evaluators
47
+ RISK_CATEGORY_EVALUATOR_MAP = {
48
+ RiskCategory.Violence: ViolenceEvaluator,
49
+ RiskCategory.HateUnfairness: HateUnfairnessEvaluator,
50
+ RiskCategory.Sexual: SexualEvaluator,
51
+ RiskCategory.SelfHarm: SelfHarmEvaluator
52
+ }
53
+
54
+ # Task timeouts and status codes
55
+ INTERNAL_TASK_TIMEOUT = 120
56
+
57
+ # Task status definitions
58
+ TASK_STATUS = {
59
+ "PENDING": "pending",
60
+ "RUNNING": "running",
61
+ "COMPLETED": "completed",
62
+ "FAILED": "failed",
63
+ "TIMEOUT": "timeout"
64
+ }
@@ -0,0 +1,164 @@
1
+ """
2
+ Utility functions for formatting, conversion, and processing in Red Team Agent.
3
+ """
4
+
5
+ import json
6
+ import pandas as pd
7
+ import math
8
+ from datetime import datetime
9
+ from typing import Dict, List, Union, Any, Optional, cast
10
+ from .._attack_strategy import AttackStrategy
11
+ from .._red_team_result import _RedTeamResult
12
+ from pyrit.models import ChatMessage
13
+
14
+
15
+ def message_to_dict(message: ChatMessage) -> Dict[str, str]:
16
+ """Convert a ChatMessage to dictionary format.
17
+
18
+ :param message: The chat message to convert
19
+ :type message: ChatMessage
20
+ :return: Dictionary representation with role and content
21
+ :rtype: Dict[str, str]
22
+ """
23
+ return {
24
+ "role": message.role,
25
+ "content": message.content,
26
+ }
27
+
28
+
29
+ def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
30
+ """Get a string name for an attack strategy or list of strategies.
31
+
32
+ :param attack_strategy: The attack strategy or list of strategies
33
+ :type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
34
+ :return: A string name for the strategy
35
+ :rtype: str
36
+ """
37
+ if isinstance(attack_strategy, List):
38
+ return "_".join([str(strategy.value) for strategy in attack_strategy])
39
+ else:
40
+ return str(attack_strategy.value)
41
+
42
+
43
+ def get_flattened_attack_strategies(attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]) -> List[Union[AttackStrategy, List[AttackStrategy]]]:
44
+ """Flatten complex attack strategies into individual strategies.
45
+
46
+ :param attack_strategies: List of attack strategies to flatten
47
+ :type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
48
+ :return: Flattened list of attack strategies
49
+ :rtype: List[Union[AttackStrategy, List[AttackStrategy]]]
50
+ """
51
+ flattened_strategies = []
52
+ seen_strategies = set()
53
+ attack_strategies_temp = attack_strategies.copy()
54
+
55
+ if AttackStrategy.EASY in attack_strategies_temp:
56
+ attack_strategies_temp.extend(
57
+ [
58
+ AttackStrategy.Base64,
59
+ AttackStrategy.Flip,
60
+ AttackStrategy.Morse
61
+ ]
62
+ )
63
+ attack_strategies_temp.remove(AttackStrategy.EASY)
64
+
65
+ if AttackStrategy.MODERATE in attack_strategies_temp:
66
+ # TODO: change these before merging.
67
+ attack_strategies_temp.extend([
68
+ AttackStrategy.AsciiArt,
69
+ AttackStrategy.Compose([
70
+ AttackStrategy.Tense,
71
+ AttackStrategy.Flip
72
+ ]),
73
+ ])
74
+ attack_strategies_temp.remove(AttackStrategy.MODERATE)
75
+ if AttackStrategy.DIFFICULT in attack_strategies_temp:
76
+ # TODO: change these before merging.
77
+ attack_strategies_temp.extend([
78
+ AttackStrategy.Compose([
79
+ AttackStrategy.Tense,
80
+ AttackStrategy.Morse
81
+ ]),
82
+ AttackStrategy.Compose([
83
+ AttackStrategy.Morse,
84
+ AttackStrategy.Base64
85
+ ]),
86
+ ])
87
+ attack_strategies_temp.remove(AttackStrategy.DIFFICULT)
88
+
89
+ ## Baseline is always included
90
+ attack_strategies_temp.append(AttackStrategy.Baseline)
91
+
92
+ for strategy in attack_strategies_temp:
93
+ if isinstance(strategy, List) and tuple(strategy) not in seen_strategies: # For composed strategies
94
+ flattened_strategies.append([s for s in strategy])
95
+ seen_strategies.add(tuple(strategy))
96
+ elif isinstance(strategy, AttackStrategy) and strategy not in seen_strategies: # For single strategies
97
+ flattened_strategies.append(strategy)
98
+ seen_strategies.add(strategy)
99
+
100
+ return flattened_strategies
101
+
102
+
103
+ def get_attack_success(result: str) -> bool:
104
+ """
105
+ Based on EVALUATION_PASS_FAIL_MAPPING, returns True denoting attack success if result matches the value of the EVALUATION_PASS_FAIL_MAPPING key False.
106
+ """
107
+ from .._red_team import EVALUATION_PASS_FAIL_MAPPING
108
+ return result == EVALUATION_PASS_FAIL_MAPPING.get(False, "true")
109
+
110
+
111
+ def format_scorecard(redteam_result: _RedTeamResult) -> str:
112
+ """Format the _RedTeamResult into a human-readable scorecard.
113
+
114
+ The scorecard presents a summary of attack success rates across different risk categories
115
+ and attack complexity levels in a tabular format.
116
+
117
+ :param redteam_result: The _RedTeamResult object to format
118
+ :type redteam_result: Dict[str, Any]
119
+ :return: A formatted scorecard as a string
120
+ :rtype: str
121
+ """
122
+ scorecard = redteam_result["redteaming_scorecard"]
123
+ overall_asr = scorecard["risk_category_summary"][0]["overall_asr"] if scorecard["risk_category_summary"] else 0
124
+
125
+ output = [f"Overall ASR: {overall_asr}%"]
126
+
127
+ separator = "-" * 108
128
+ output.append(separator)
129
+ output.append(f"{'Risk Category':<15}| {'Baseline ASR':<14} | {'Easy-Complexity Attacks ASR':<28} | {'Moderate-Complexity Attacks ASR':<30} | {'Difficult-Complexity Attacks ASR':<30}")
130
+ output.append(separator)
131
+
132
+ for item in scorecard["joint_risk_attack_summary"]:
133
+ risk_category = item["risk_category"].replace("_", "-").capitalize()
134
+ baseline = f"{item['baseline_asr']}%" if 'baseline_asr' in item else "N/A"
135
+ easy = f"{item['easy_complexity_asr']}%" if 'easy_complexity_asr' in item else "N/A"
136
+ moderate = f"{item['moderate_complexity_asr']}%" if 'moderate_complexity_asr' in item else "N/A"
137
+ difficult = f"{item['difficult_complexity_asr']}%" if 'difficult_complexity_asr' in item else "N/A"
138
+
139
+ output.append(f"{risk_category:<15}| {baseline:<14} | {easy:<28} | {moderate:<31} | {difficult:<30}")
140
+
141
+ return "\n".join(output)
142
+
143
+
144
+ def is_none_or_nan(value: Any) -> bool:
145
+ """Check if a value is None or NaN."""
146
+ if value is None:
147
+ return True
148
+ if isinstance(value, float) and math.isnan(value):
149
+ return True
150
+ return False
151
+
152
+
153
+ def list_mean_nan_safe(data_list: List[Any]) -> float:
154
+ """Calculate the mean of a list, handling None and NaN values safely.
155
+
156
+ :param data_list: List of values to calculate mean for
157
+ :type data_list: List[Any]
158
+ :return: Mean value or 0.0 if list is empty after filtering
159
+ :rtype: float
160
+ """
161
+ filtered_list = [x for x in data_list if not is_none_or_nan(x)]
162
+ if not filtered_list:
163
+ return 0.0
164
+ return sum(filtered_list) / len(filtered_list)
@@ -0,0 +1,139 @@
1
+ """
2
+ Logging utilities for Red Team Agent.
3
+
4
+ This module provides consistent logging configuration and helper functions
5
+ for logging throughout the Red Team Agent.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ from datetime import datetime
11
+
12
+
13
+ def setup_logger(logger_name="RedTeamLogger", output_dir=None):
14
+ """Configure and return a logger instance for the Red Team Agent.
15
+
16
+ Creates two handlers:
17
+ - File handler: Captures all logs at DEBUG level
18
+ - Console handler: Shows WARNING and above for better visibility
19
+
20
+ :param logger_name: Name to use for the logger
21
+ :type logger_name: str
22
+ :param output_dir: Directory to store log files in. If None, logs are stored in current directory.
23
+ :type output_dir: Optional[str]
24
+ :return: The configured logger instance
25
+ :rtype: logging.Logger
26
+ """
27
+ # Format matches what's expected in test_setup_logger
28
+ log_filename = "redteam.log"
29
+
30
+ # If output directory is specified, create path with that directory
31
+ if output_dir:
32
+ os.makedirs(output_dir, exist_ok=True)
33
+ log_filepath = os.path.join(output_dir, log_filename)
34
+ else:
35
+ log_filepath = log_filename
36
+
37
+ logger = logging.getLogger(logger_name)
38
+ logger.setLevel(logging.DEBUG)
39
+
40
+ # Clear any existing handlers (in case logger was already configured)
41
+ if logger.handlers:
42
+ for handler in logger.handlers:
43
+ logger.removeHandler(handler)
44
+
45
+ # File handler - captures all logs at DEBUG level with detailed formatting
46
+ file_handler = logging.FileHandler(log_filepath)
47
+ file_handler.setLevel(logging.DEBUG)
48
+ file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
49
+ file_handler.setFormatter(file_formatter)
50
+ logger.addHandler(file_handler)
51
+
52
+ # Console handler - shows only WARNING and above to reduce output but keep important messages
53
+ console_handler = logging.StreamHandler()
54
+ console_handler.setLevel(logging.WARNING)
55
+ console_formatter = logging.Formatter('%(levelname)s: %(message)s')
56
+ console_handler.setFormatter(console_formatter)
57
+ logger.addHandler(console_handler)
58
+
59
+ # Don't propagate to root logger to avoid duplicate logs
60
+ logger.propagate = False
61
+
62
+ return logger
63
+
64
+
65
+ def log_section_header(logger, section_title):
66
+ """Log a section header to improve log readability.
67
+
68
+ :param logger: The logger instance
69
+ :type logger: logging.Logger
70
+ :param section_title: The title of the section
71
+ :type section_title: str
72
+ """
73
+ logger.debug("=" * 80)
74
+ logger.debug(section_title.upper())
75
+ logger.debug("=" * 80)
76
+
77
+
78
+ def log_subsection_header(logger, section_title):
79
+ """Log a subsection header to improve log readability.
80
+
81
+ :param logger: The logger instance
82
+ :type logger: logging.Logger
83
+ :param section_title: The title of the subsection
84
+ :type section_title: str
85
+ """
86
+ logger.debug("-" * 60)
87
+ logger.debug(section_title)
88
+ logger.debug("-" * 60)
89
+
90
+
91
+ def log_strategy_start(logger, strategy_name, risk_category):
92
+ """Log the start of a strategy processing.
93
+
94
+ :param logger: The logger instance
95
+ :type logger: logging.Logger
96
+ :param strategy_name: The name of the strategy
97
+ :type strategy_name: str
98
+ :param risk_category: The risk category being processed
99
+ :type risk_category: str
100
+ """
101
+ logger.info(f"Starting processing of {strategy_name} strategy for {risk_category} risk category")
102
+
103
+
104
+ def log_strategy_completion(logger, strategy_name, risk_category, elapsed_time=None):
105
+ """Log the completion of a strategy processing.
106
+
107
+ :param logger: The logger instance
108
+ :type logger: logging.Logger
109
+ :param strategy_name: The name of the strategy
110
+ :type strategy_name: str
111
+ :param risk_category: The risk category being processed
112
+ :type risk_category: str
113
+ :param elapsed_time: The time taken to process, if available
114
+ :type elapsed_time: float
115
+ """
116
+ if elapsed_time:
117
+ logger.info(f"Completed {strategy_name} strategy for {risk_category} risk category in {elapsed_time:.2f}s")
118
+ else:
119
+ logger.info(f"Completed {strategy_name} strategy for {risk_category} risk category")
120
+
121
+
122
+ def log_error(logger, message, exception=None, context=None):
123
+ """Log an error with additional context if available.
124
+
125
+ :param logger: The logger instance
126
+ :type logger: logging.Logger
127
+ :param message: The error message
128
+ :type message: str
129
+ :param exception: The exception that was raised, if any
130
+ :type exception: Exception
131
+ :param context: Additional context about where the error occurred
132
+ :type context: str
133
+ """
134
+ error_msg = message
135
+ if context:
136
+ error_msg = f"[{context}] {error_msg}"
137
+ if exception:
138
+ error_msg = f"{error_msg}: {str(exception)}"
139
+ logger.error(error_msg)