azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +51 -6
- azure/ai/evaluation/_aoai/__init__.py +1 -1
- azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
- azure/ai/evaluation/_aoai/label_grader.py +3 -2
- azure/ai/evaluation/_aoai/python_grader.py +84 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +91 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
- azure/ai/evaluation/_azure/_envs.py +9 -10
- azure/ai/evaluation/_azure/_token_manager.py +7 -1
- azure/ai/evaluation/_common/constants.py +11 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
- azure/ai/evaluation/_common/onedp/__init__.py +32 -32
- azure/ai/evaluation/_common/onedp/_client.py +136 -139
- azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
- azure/ai/evaluation/_common/onedp/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -50
- azure/ai/evaluation/_common/onedp/_version.py +9 -9
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
- azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
- azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
- azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5657
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/rai_service.py +88 -52
- azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
- azure/ai/evaluation/_common/utils.py +188 -10
- azure/ai/evaluation/_constants.py +2 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +9 -8
- azure/ai/evaluation/_converters/_models.py +46 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +2 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +73 -25
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +210 -94
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +132 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
- azure/ai/evaluation/_evaluate/_utils.py +25 -17
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +4 -4
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +20 -12
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +6 -6
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +45 -11
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +28 -18
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +11 -8
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +11 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +12 -9
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -7
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +37 -64
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +5 -5
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +4 -4
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +12 -8
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +31 -26
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -4
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +14 -7
- azure/ai/evaluation/_evaluators/_qa/_qa.py +5 -5
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +62 -15
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +140 -59
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +21 -26
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +5 -5
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +22 -22
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +7 -6
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +4 -4
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +27 -24
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +354 -66
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +175 -183
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +99 -21
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +20 -12
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +10 -7
- azure/ai/evaluation/_exceptions.py +10 -0
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +6 -3
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +117 -32
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +2 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +2 -2
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +33 -41
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +195 -111
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +3 -1
- azure/ai/evaluation/red_team/_agent/__init__.py +1 -1
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +68 -71
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +103 -145
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +26 -6
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +62 -71
- azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
- azure/ai/evaluation/red_team/_attack_strategy.py +2 -1
- azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
- azure/ai/evaluation/red_team/_default_converter.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +1947 -1040
- azure/ai/evaluation/red_team/_red_team_result.py +49 -38
- azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +39 -34
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +163 -138
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +14 -14
- azure/ai/evaluation/red_team/_utils/constants.py +1 -13
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
- azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +31 -4
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +33 -25
- azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +31 -17
- azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +18 -6
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -24
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +30 -10
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
- azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
- azure/ai/evaluation/simulator/_simulator.py +21 -8
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/METADATA +46 -3
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/RECORD +141 -136
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/top_level.txt +0 -0
|
@@ -16,12 +16,13 @@ from semantic_kernel.functions import kernel_function
|
|
|
16
16
|
from azure.ai.evaluation.red_team._agent._agent_tools import RedTeamToolProvider
|
|
17
17
|
from azure.identity import DefaultAzureCredential
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
class RedTeamPlugin:
|
|
20
21
|
"""
|
|
21
22
|
A Semantic Kernel plugin that provides red teaming capabilities.
|
|
22
23
|
This plugin wraps around the RedTeamToolProvider to provide red teaming functions
|
|
23
24
|
as Semantic Kernel functions.
|
|
24
|
-
|
|
25
|
+
|
|
25
26
|
Example:
|
|
26
27
|
```python
|
|
27
28
|
# Method 1: Create a plugin with individual environment variables
|
|
@@ -29,7 +30,7 @@ class RedTeamPlugin:
|
|
|
29
30
|
azure_ai_project_endpoint=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
|
|
30
31
|
target_func=lambda x: "Target model response"
|
|
31
32
|
)
|
|
32
|
-
|
|
33
|
+
|
|
33
34
|
# Create a Semantic Kernel agent with the plugin
|
|
34
35
|
agent = ChatCompletionAgent(
|
|
35
36
|
service=service,
|
|
@@ -39,9 +40,15 @@ class RedTeamPlugin:
|
|
|
39
40
|
)
|
|
40
41
|
```
|
|
41
42
|
"""
|
|
42
|
-
|
|
43
|
-
def __init__(
|
|
44
|
-
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
azure_ai_project_endpoint: str,
|
|
47
|
+
target_func: Optional[Callable[[str], str]] = None,
|
|
48
|
+
*,
|
|
49
|
+
application_scenario: str = "",
|
|
50
|
+
**kwargs,
|
|
51
|
+
):
|
|
45
52
|
"""
|
|
46
53
|
Initialize the RedTeamPlugin with the necessary configuration components.
|
|
47
54
|
|
|
@@ -49,31 +56,31 @@ class RedTeamPlugin:
|
|
|
49
56
|
:param target_func: Optional function to call with prompts
|
|
50
57
|
:param application_scenario: The application scenario for the tool provider
|
|
51
58
|
"""
|
|
52
|
-
|
|
59
|
+
|
|
53
60
|
# Initialize credential and tool provider
|
|
54
61
|
self.credential = DefaultAzureCredential()
|
|
55
62
|
self.tool_provider = RedTeamToolProvider(
|
|
56
63
|
azure_ai_project_endpoint=azure_ai_project_endpoint,
|
|
57
64
|
credential=self.credential,
|
|
58
|
-
application_scenario=application_scenario
|
|
65
|
+
application_scenario=application_scenario,
|
|
59
66
|
)
|
|
60
|
-
|
|
67
|
+
|
|
61
68
|
# Store the target function
|
|
62
69
|
self.target_function = target_func
|
|
63
|
-
|
|
70
|
+
|
|
64
71
|
# Dictionary to store fetched prompts for reference
|
|
65
72
|
self.fetched_prompts = {}
|
|
66
|
-
|
|
73
|
+
|
|
67
74
|
@kernel_function(description="Fetch a harmful prompt for a specific risk category to test content filters")
|
|
68
75
|
async def fetch_harmful_prompt(
|
|
69
|
-
self,
|
|
76
|
+
self,
|
|
70
77
|
risk_category: Annotated[str, "The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')"],
|
|
71
78
|
strategy: Annotated[str, "Attack strategy to use (e.g., 'baseline', 'jailbreak')"] = "baseline",
|
|
72
|
-
convert_with_strategy: Annotated[str, "Optional strategy to convert the prompt"] = ""
|
|
79
|
+
convert_with_strategy: Annotated[str, "Optional strategy to convert the prompt"] = "",
|
|
73
80
|
) -> Annotated[str, "A JSON string with the harmful prompt and metadata"]:
|
|
74
81
|
"""
|
|
75
82
|
Fetch a harmful prompt for a specific risk category to test content filters.
|
|
76
|
-
|
|
83
|
+
|
|
77
84
|
:param risk_category: The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')
|
|
78
85
|
:param strategy: Attack strategy to use (e.g., 'baseline', 'jailbreak')
|
|
79
86
|
:param convert_with_strategy: Optional strategy to convert the prompt
|
|
@@ -82,14 +89,12 @@ class RedTeamPlugin:
|
|
|
82
89
|
# Convert empty string to None
|
|
83
90
|
if not convert_with_strategy:
|
|
84
91
|
convert_with_strategy = None
|
|
85
|
-
|
|
92
|
+
|
|
86
93
|
# Directly await the async method instead of using asyncio.run()
|
|
87
94
|
result = await self.tool_provider.fetch_harmful_prompt(
|
|
88
|
-
risk_category_text=risk_category,
|
|
89
|
-
strategy=strategy,
|
|
90
|
-
convert_with_strategy=convert_with_strategy
|
|
95
|
+
risk_category_text=risk_category, strategy=strategy, convert_with_strategy=convert_with_strategy
|
|
91
96
|
)
|
|
92
|
-
|
|
97
|
+
|
|
93
98
|
# Store the prompt for later conversion if successful
|
|
94
99
|
if result["status"] == "success" and "prompt_id" in result:
|
|
95
100
|
prompt_id = result["prompt_id"]
|
|
@@ -97,18 +102,18 @@ class RedTeamPlugin:
|
|
|
97
102
|
self.fetched_prompts[prompt_id] = result["prompt"]
|
|
98
103
|
# Also update the tool provider's cache
|
|
99
104
|
self.tool_provider._fetched_prompts[prompt_id] = result["prompt"]
|
|
100
|
-
|
|
105
|
+
|
|
101
106
|
return json.dumps(result)
|
|
102
|
-
|
|
107
|
+
|
|
103
108
|
@kernel_function(description="Convert a prompt using a specified strategy")
|
|
104
109
|
async def convert_prompt(
|
|
105
110
|
self,
|
|
106
111
|
prompt_or_id: Annotated[str, "Either a prompt text or a prompt ID from a previous fetch"],
|
|
107
|
-
strategy: Annotated[str, "The strategy to use for conversion"]
|
|
112
|
+
strategy: Annotated[str, "The strategy to use for conversion"],
|
|
108
113
|
) -> Annotated[str, "A JSON string with the original and converted prompt"]:
|
|
109
114
|
"""
|
|
110
115
|
Convert a prompt or a previously fetched prompt ID using a specified strategy.
|
|
111
|
-
|
|
116
|
+
|
|
112
117
|
:param prompt_or_id: Either a prompt text or a prompt ID from a previous fetch
|
|
113
118
|
:param strategy: The strategy to use for conversion
|
|
114
119
|
:return: A JSON string with the original and converted prompt
|
|
@@ -117,37 +122,31 @@ class RedTeamPlugin:
|
|
|
117
122
|
if prompt_or_id in self.fetched_prompts:
|
|
118
123
|
# Update the provider's cache
|
|
119
124
|
self.tool_provider._fetched_prompts[prompt_or_id] = self.fetched_prompts[prompt_or_id]
|
|
120
|
-
|
|
125
|
+
|
|
121
126
|
# Directly await the async method instead of using asyncio.run()
|
|
122
|
-
result = await self.tool_provider.convert_prompt(
|
|
123
|
-
|
|
124
|
-
strategy=strategy
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
+
result = await self.tool_provider.convert_prompt(prompt_or_id=prompt_or_id, strategy=strategy)
|
|
128
|
+
|
|
127
129
|
return json.dumps(result)
|
|
128
|
-
|
|
130
|
+
|
|
129
131
|
@kernel_function(description="Get a harmful prompt for a specific risk category and optionally convert it")
|
|
130
132
|
async def red_team_unified(
|
|
131
133
|
self,
|
|
132
134
|
category: Annotated[str, "The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')"],
|
|
133
|
-
strategy: Annotated[str, "Optional strategy to convert the prompt"] = ""
|
|
135
|
+
strategy: Annotated[str, "Optional strategy to convert the prompt"] = "",
|
|
134
136
|
) -> Annotated[str, "A JSON string with the harmful prompt and metadata"]:
|
|
135
137
|
"""
|
|
136
138
|
Get a harmful prompt for a specific risk category and optionally convert it.
|
|
137
|
-
|
|
139
|
+
|
|
138
140
|
:param category: The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')
|
|
139
141
|
:param strategy: Optional strategy to convert the prompt
|
|
140
142
|
:return: A JSON string with the harmful prompt and metadata
|
|
141
143
|
"""
|
|
142
144
|
# Convert empty string to None
|
|
143
145
|
strategy_param = strategy if strategy else None
|
|
144
|
-
|
|
146
|
+
|
|
145
147
|
# Directly await the async method instead of using asyncio.run()
|
|
146
|
-
result = await self.tool_provider.red_team(
|
|
147
|
-
|
|
148
|
-
strategy=strategy_param
|
|
149
|
-
)
|
|
150
|
-
|
|
148
|
+
result = await self.tool_provider.red_team(category=category, strategy=strategy_param)
|
|
149
|
+
|
|
151
150
|
# Store the prompt for later conversion if it's a success and we didn't convert it
|
|
152
151
|
if result["status"] == "success":
|
|
153
152
|
if "prompt_id" in result and "prompt" in result:
|
|
@@ -155,30 +154,27 @@ class RedTeamPlugin:
|
|
|
155
154
|
self.fetched_prompts[prompt_id] = result["prompt"]
|
|
156
155
|
# Also update the tool provider's cache
|
|
157
156
|
self.tool_provider._fetched_prompts[prompt_id] = result["prompt"]
|
|
158
|
-
|
|
157
|
+
|
|
159
158
|
return json.dumps(result)
|
|
160
|
-
|
|
159
|
+
|
|
161
160
|
@kernel_function(description="Get a list of all available prompt conversion strategies")
|
|
162
161
|
async def get_available_strategies(self) -> Annotated[str, "A JSON string with available conversion strategies"]:
|
|
163
162
|
"""
|
|
164
163
|
Get a list of all available prompt conversion strategies.
|
|
165
|
-
|
|
164
|
+
|
|
166
165
|
:return: A JSON string with available conversion strategies
|
|
167
166
|
"""
|
|
168
167
|
# This method calls a synchronous function, but we make the method itself async
|
|
169
168
|
# for consistency with the rest of the interface
|
|
170
169
|
strategies = self.tool_provider.get_available_strategies()
|
|
171
|
-
|
|
172
|
-
return json.dumps({
|
|
173
|
-
|
|
174
|
-
"available_strategies": strategies
|
|
175
|
-
})
|
|
176
|
-
|
|
170
|
+
|
|
171
|
+
return json.dumps({"status": "success", "available_strategies": strategies})
|
|
172
|
+
|
|
177
173
|
@kernel_function(description="Explain the purpose and responsible use of red teaming tools")
|
|
178
174
|
async def explain_purpose(self) -> Annotated[str, "A JSON string with information about red teaming tools"]:
|
|
179
175
|
"""
|
|
180
176
|
Explain the purpose and responsible use of red teaming tools.
|
|
181
|
-
|
|
177
|
+
|
|
182
178
|
:return: A JSON string with information about red teaming tools
|
|
183
179
|
"""
|
|
184
180
|
# This method doesn't use any async functions, but we make it async
|
|
@@ -188,50 +184,45 @@ class RedTeamPlugin:
|
|
|
188
184
|
"responsible_use": [
|
|
189
185
|
"Only use these tools on systems you have permission to test.",
|
|
190
186
|
"Use the results to improve system safety and robustness.",
|
|
191
|
-
"Do not use these tools to cause harm or bypass legitimate safety measures in production systems."
|
|
187
|
+
"Do not use these tools to cause harm or bypass legitimate safety measures in production systems.",
|
|
192
188
|
],
|
|
193
189
|
"risk_categories": {
|
|
194
190
|
"violence": "Content related to physical harm, weapons, or dangerous activities",
|
|
195
191
|
"hate_unfairness": "Content related to discrimination, bias, or prejudice",
|
|
196
192
|
"sexual": "Explicit sexual content",
|
|
197
|
-
"self_harm": "Content related to suicide, self-injury, or harmful behaviors"
|
|
193
|
+
"self_harm": "Content related to suicide, self-injury, or harmful behaviors",
|
|
198
194
|
},
|
|
199
|
-
"conversion_strategies": "Various encoding and obfuscation methods to test if harmful content can bypass detection"
|
|
195
|
+
"conversion_strategies": "Various encoding and obfuscation methods to test if harmful content can bypass detection",
|
|
200
196
|
}
|
|
201
|
-
|
|
197
|
+
|
|
202
198
|
return json.dumps(explanation)
|
|
203
|
-
|
|
199
|
+
|
|
204
200
|
@kernel_function(description="Send a prompt to the target function and return the response")
|
|
205
201
|
async def send_to_target(
|
|
206
|
-
self,
|
|
207
|
-
prompt: Annotated[str, "The prompt text to send to the target function"]
|
|
202
|
+
self, prompt: Annotated[str, "The prompt text to send to the target function"]
|
|
208
203
|
) -> Annotated[str, "A JSON string with the response from the target"]:
|
|
209
204
|
"""
|
|
210
205
|
Send a prompt to the target function and return the response.
|
|
211
|
-
|
|
206
|
+
|
|
212
207
|
:param prompt: The prompt text to send to the target function
|
|
213
208
|
:return: A JSON string with the response from the target
|
|
214
209
|
"""
|
|
215
210
|
# This method doesn't use any async functions, but we make it async
|
|
216
211
|
# for consistency with the rest of the interface
|
|
217
212
|
if self.target_function is None:
|
|
218
|
-
return json.dumps(
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
213
|
+
return json.dumps(
|
|
214
|
+
{
|
|
215
|
+
"status": "error",
|
|
216
|
+
"message": "Target function not initialized. Make sure to pass a target_func when initializing the plugin.",
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
|
|
223
220
|
try:
|
|
224
221
|
# Call the target function with the prompt
|
|
225
222
|
response = self.target_function(prompt)
|
|
226
|
-
|
|
227
|
-
return json.dumps({
|
|
228
|
-
"status": "success",
|
|
229
|
-
"prompt": prompt,
|
|
230
|
-
"response": response
|
|
231
|
-
})
|
|
223
|
+
|
|
224
|
+
return json.dumps({"status": "success", "prompt": prompt, "response": response})
|
|
232
225
|
except Exception as e:
|
|
233
|
-
return json.dumps(
|
|
234
|
-
"status": "error",
|
|
235
|
-
|
|
236
|
-
"prompt": prompt
|
|
237
|
-
})
|
|
226
|
+
return json.dumps(
|
|
227
|
+
{"status": "error", "message": f"Error calling target function: {str(e)}", "prompt": prompt}
|
|
228
|
+
)
|
|
@@ -13,10 +13,19 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
13
13
|
@experimental
|
|
14
14
|
class RiskCategory(str, Enum):
|
|
15
15
|
"""Risk categories for attack objectives."""
|
|
16
|
+
|
|
16
17
|
HateUnfairness = "hate_unfairness"
|
|
17
18
|
Violence = "violence"
|
|
18
19
|
Sexual = "sexual"
|
|
19
20
|
SelfHarm = "self_harm"
|
|
21
|
+
ProtectedMaterial = "protected_material"
|
|
22
|
+
CodeVulnerability = "code_vulnerability"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@experimental
|
|
26
|
+
class _InternalRiskCategory(str, Enum):
|
|
27
|
+
ECI = "eci"
|
|
28
|
+
|
|
20
29
|
|
|
21
30
|
class _AttackObjectiveGenerator:
|
|
22
31
|
"""Generator for creating attack objectives.
|
|
@@ -30,6 +39,7 @@ class _AttackObjectiveGenerator:
|
|
|
30
39
|
:param custom_attack_seed_prompts: Path to a JSON file containing custom attack seed prompts (can be absolute or relative path)
|
|
31
40
|
:type custom_attack_seed_prompts: Optional[str]
|
|
32
41
|
"""
|
|
42
|
+
|
|
33
43
|
def __init__(
|
|
34
44
|
self,
|
|
35
45
|
risk_categories: Optional[List[RiskCategory]] = None,
|
|
@@ -42,49 +52,51 @@ class _AttackObjectiveGenerator:
|
|
|
42
52
|
self.application_scenario = application_scenario
|
|
43
53
|
self.custom_attack_seed_prompts = custom_attack_seed_prompts
|
|
44
54
|
self.logger = logging.getLogger("_AttackObjectiveGenerator")
|
|
45
|
-
|
|
55
|
+
|
|
46
56
|
# If custom_attack_seed_prompts is provided, validate and load them
|
|
47
57
|
self.custom_prompts = None
|
|
48
58
|
self.validated_prompts = []
|
|
49
59
|
self.valid_prompts_by_category = {}
|
|
50
|
-
|
|
60
|
+
|
|
51
61
|
if custom_attack_seed_prompts:
|
|
52
62
|
self._load_and_validate_custom_prompts()
|
|
53
|
-
|
|
63
|
+
|
|
54
64
|
def _load_and_validate_custom_prompts(self) -> None:
|
|
55
65
|
"""Load and validate custom attack seed prompts from the provided file path."""
|
|
56
66
|
if not self.custom_attack_seed_prompts:
|
|
57
67
|
return
|
|
58
|
-
|
|
68
|
+
|
|
59
69
|
# Handle both absolute and relative paths
|
|
60
70
|
custom_prompts_path = Path(self.custom_attack_seed_prompts)
|
|
61
|
-
|
|
71
|
+
|
|
62
72
|
# Convert to absolute path if it's a relative path
|
|
63
73
|
if not custom_prompts_path.is_absolute():
|
|
64
74
|
self.logger.info(f"Converting relative path '{custom_prompts_path}' to absolute path")
|
|
65
75
|
custom_prompts_path = Path.cwd() / custom_prompts_path
|
|
66
|
-
|
|
76
|
+
|
|
67
77
|
self.logger.debug(f"Using absolute path: {custom_prompts_path}")
|
|
68
|
-
|
|
78
|
+
|
|
69
79
|
# Check if the file exists
|
|
70
80
|
if not custom_prompts_path.exists():
|
|
71
81
|
raise ValueError(f"Custom attack seed prompts file not found: {custom_prompts_path}")
|
|
72
|
-
|
|
82
|
+
|
|
73
83
|
try:
|
|
74
84
|
# Load JSON file
|
|
75
|
-
with open(custom_prompts_path,
|
|
85
|
+
with open(custom_prompts_path, "r", encoding="utf-8") as f:
|
|
76
86
|
self.custom_prompts = json.load(f)
|
|
77
|
-
|
|
87
|
+
|
|
78
88
|
# Validate that it's a list
|
|
79
89
|
if not isinstance(self.custom_prompts, list):
|
|
80
|
-
raise ValueError(
|
|
81
|
-
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Custom attack seed prompts must be a JSON array, got {type(self.custom_prompts)}, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
92
|
+
)
|
|
93
|
+
|
|
82
94
|
self.logger.info(f"Loaded {len(self.custom_prompts)} prompts from {self.custom_attack_seed_prompts}")
|
|
83
|
-
|
|
95
|
+
|
|
84
96
|
# Initialize dictionary for categorized prompts
|
|
85
97
|
for risk_category in RiskCategory:
|
|
86
98
|
self.valid_prompts_by_category[risk_category.value] = []
|
|
87
|
-
|
|
99
|
+
|
|
88
100
|
# Process each prompt and validate format
|
|
89
101
|
valid_prompts_count = 0
|
|
90
102
|
invalid_prompts_count = 0
|
|
@@ -94,102 +106,132 @@ class _AttackObjectiveGenerator:
|
|
|
94
106
|
if not isinstance(prompt, dict):
|
|
95
107
|
self.logger.warning(f"Skipping prompt {i}: not a JSON object")
|
|
96
108
|
continue
|
|
97
|
-
|
|
109
|
+
|
|
98
110
|
if "metadata" not in prompt:
|
|
99
111
|
self.logger.warning(f"Skipping prompt {i}: missing 'metadata' field")
|
|
100
112
|
continue
|
|
101
|
-
|
|
113
|
+
|
|
102
114
|
if "messages" not in prompt or not prompt["messages"]:
|
|
103
115
|
self.logger.warning(f"Skipping prompt {i}: missing or empty 'messages' field")
|
|
104
116
|
continue
|
|
105
|
-
|
|
117
|
+
|
|
106
118
|
# Check metadata structure
|
|
107
119
|
metadata = prompt["metadata"]
|
|
108
120
|
if not isinstance(metadata, dict):
|
|
109
|
-
self.logger.warning(
|
|
121
|
+
self.logger.warning(
|
|
122
|
+
f"Skipping prompt {i}: 'metadata' is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
123
|
+
)
|
|
110
124
|
continue
|
|
111
|
-
|
|
125
|
+
|
|
112
126
|
if "target_harms" not in metadata or not metadata["target_harms"]:
|
|
113
|
-
self.logger.warning(
|
|
127
|
+
self.logger.warning(
|
|
128
|
+
f"Skipping prompt {i}: missing or empty 'target_harms' in metadata, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
129
|
+
)
|
|
114
130
|
continue
|
|
115
|
-
|
|
131
|
+
|
|
116
132
|
# Check target_harms structure
|
|
117
133
|
valid_risk_types = {cat.value for cat in RiskCategory}
|
|
118
134
|
valid_risk_found = False
|
|
119
135
|
prompt_categories = []
|
|
120
|
-
|
|
136
|
+
|
|
121
137
|
for harm in metadata["target_harms"]:
|
|
122
138
|
if not isinstance(harm, dict):
|
|
123
|
-
self.logger.warning(
|
|
139
|
+
self.logger.warning(
|
|
140
|
+
f"Skipping harm in prompt {i}: not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
141
|
+
)
|
|
124
142
|
continue
|
|
125
|
-
|
|
143
|
+
|
|
126
144
|
if "risk-type" not in harm:
|
|
127
|
-
self.logger.warning(
|
|
145
|
+
self.logger.warning(
|
|
146
|
+
f"Skipping harm in prompt {i}: missing 'risk-type' field, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
147
|
+
)
|
|
128
148
|
continue
|
|
129
|
-
|
|
149
|
+
|
|
130
150
|
risk_type = harm.get("risk-type", "")
|
|
131
151
|
if risk_type not in valid_risk_types:
|
|
132
|
-
self.logger.warning(
|
|
152
|
+
self.logger.warning(
|
|
153
|
+
f"Skipping harm in prompt {i}: invalid risk-type '{risk_type}'. Valid types: {valid_risk_types}. see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
154
|
+
)
|
|
133
155
|
continue
|
|
134
|
-
|
|
156
|
+
|
|
135
157
|
prompt_categories.append(risk_type)
|
|
136
158
|
valid_risk_found = True
|
|
137
|
-
|
|
159
|
+
|
|
138
160
|
if not valid_risk_found:
|
|
139
|
-
self.logger.warning(
|
|
161
|
+
self.logger.warning(
|
|
162
|
+
f"Skipping prompt {i}: no valid risk types found. See https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
163
|
+
)
|
|
140
164
|
continue
|
|
141
|
-
|
|
165
|
+
|
|
142
166
|
# Check messages structure
|
|
143
167
|
messages = prompt["messages"]
|
|
144
168
|
if not isinstance(messages, list) or not messages:
|
|
145
|
-
self.logger.warning(
|
|
169
|
+
self.logger.warning(
|
|
170
|
+
f"Skipping prompt {i}: 'messages' is not a list or is empty, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
171
|
+
)
|
|
146
172
|
continue
|
|
147
|
-
|
|
173
|
+
|
|
148
174
|
message = messages[0]
|
|
149
175
|
if not isinstance(message, dict):
|
|
150
|
-
self.logger.warning(
|
|
176
|
+
self.logger.warning(
|
|
177
|
+
f"Skipping prompt {i}: first message is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
178
|
+
)
|
|
151
179
|
continue
|
|
152
|
-
|
|
180
|
+
|
|
153
181
|
if "role" not in message or message["role"] != "user":
|
|
154
|
-
self.logger.warning(
|
|
182
|
+
self.logger.warning(
|
|
183
|
+
f"Skipping prompt {i}: first message must have role='user', see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
184
|
+
)
|
|
155
185
|
continue
|
|
156
|
-
|
|
186
|
+
|
|
157
187
|
if "content" not in message or not message["content"]:
|
|
158
|
-
self.logger.warning(
|
|
188
|
+
self.logger.warning(
|
|
189
|
+
f"Skipping prompt {i}: first message missing or empty 'content', see https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
190
|
+
)
|
|
159
191
|
continue
|
|
160
|
-
|
|
192
|
+
|
|
161
193
|
# If we got here, the prompt is valid
|
|
162
194
|
self.validated_prompts.append(prompt)
|
|
163
195
|
valid_prompts_count += 1
|
|
164
|
-
|
|
196
|
+
|
|
165
197
|
# Add to the appropriate categories
|
|
166
198
|
for category in prompt_categories:
|
|
167
199
|
self.valid_prompts_by_category[category].append(prompt)
|
|
168
|
-
|
|
200
|
+
|
|
169
201
|
except Exception as e:
|
|
170
202
|
self.logger.warning(f"Error validating prompt {i}: {str(e)}")
|
|
171
203
|
invalid_prompts_count += 1
|
|
172
|
-
|
|
204
|
+
|
|
173
205
|
# Check if we have at least one valid prompt
|
|
174
206
|
if valid_prompts_count == 0:
|
|
175
|
-
raise ValueError(
|
|
176
|
-
|
|
207
|
+
raise ValueError(
|
|
208
|
+
"No valid prompts found in custom attack seed prompts file. See https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
209
|
+
)
|
|
210
|
+
|
|
177
211
|
self.logger.info(f"Loaded {valid_prompts_count} valid prompts from custom attack seed prompts file")
|
|
178
|
-
|
|
212
|
+
|
|
179
213
|
if invalid_prompts_count > 0:
|
|
180
214
|
self.logger.warning(f"Skipped {invalid_prompts_count} invalid prompts")
|
|
181
|
-
|
|
215
|
+
|
|
182
216
|
# Log the breakdown by risk category
|
|
183
|
-
category_counts = {
|
|
217
|
+
category_counts = {
|
|
218
|
+
cat: len(prompts) for cat, prompts in self.valid_prompts_by_category.items() if len(prompts) > 0
|
|
219
|
+
}
|
|
184
220
|
self.logger.info(f"Prompt distribution by risk category: {category_counts}")
|
|
185
|
-
|
|
221
|
+
|
|
186
222
|
# Automatically extract risk categories from valid prompts if not provided
|
|
187
223
|
if not self.risk_categories:
|
|
188
224
|
categories_with_prompts = [cat for cat, prompts in self.valid_prompts_by_category.items() if prompts]
|
|
189
225
|
self.risk_categories = [RiskCategory(cat) for cat in categories_with_prompts]
|
|
190
|
-
self.logger.info(
|
|
191
|
-
|
|
226
|
+
self.logger.info(
|
|
227
|
+
f"Automatically set risk categories based on valid prompts: {[cat.value for cat in self.risk_categories]}"
|
|
228
|
+
)
|
|
229
|
+
|
|
192
230
|
except json.JSONDecodeError as e:
|
|
193
|
-
raise ValueError(
|
|
231
|
+
raise ValueError(
|
|
232
|
+
f"Failed to parse custom attack seed prompts file: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
233
|
+
)
|
|
194
234
|
except Exception as e:
|
|
195
|
-
raise ValueError(
|
|
235
|
+
raise ValueError(
|
|
236
|
+
f"Error loading custom attack seed prompts: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information"
|
|
237
|
+
)
|
|
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
9
9
|
@experimental
|
|
10
10
|
class AttackStrategy(Enum):
|
|
11
11
|
"""Strategies for attacks."""
|
|
12
|
+
|
|
12
13
|
EASY = "easy"
|
|
13
14
|
MODERATE = "moderate"
|
|
14
15
|
DIFFICULT = "difficult"
|
|
@@ -42,6 +43,6 @@ class AttackStrategy(Enum):
|
|
|
42
43
|
for item in items:
|
|
43
44
|
if not isinstance(item, cls):
|
|
44
45
|
raise ValueError("All items must be instances of AttackStrategy")
|
|
45
|
-
if len(items) > 2:
|
|
46
|
+
if len(items) > 2:
|
|
46
47
|
raise ValueError("Composed strategies must have at most 2 items")
|
|
47
48
|
return items
|
|
@@ -49,17 +49,12 @@ class _CallbackChatTarget(PromptChatTarget):
|
|
|
49
49
|
logger.info(f"Sending the following prompt to the prompt target: {request}")
|
|
50
50
|
|
|
51
51
|
# response_context contains "messages", "stream", "session_state, "context"
|
|
52
|
-
response_context = await self._callback(messages=messages, stream=self._stream, session_state=None, context=None)
|
|
52
|
+
response_context = await self._callback(messages=messages, stream=self._stream, session_state=None, context=None) # type: ignore
|
|
53
53
|
|
|
54
54
|
response_text = response_context["messages"][-1]["content"]
|
|
55
|
-
response_entry = construct_response_from_request(
|
|
56
|
-
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
logger.info(
|
|
60
|
-
"Received the following response from the prompt target"
|
|
61
|
-
+ f"{response_text}"
|
|
62
|
-
)
|
|
55
|
+
response_entry = construct_response_from_request(request=request, response_text_pieces=[response_text])
|
|
56
|
+
|
|
57
|
+
logger.info("Received the following response from the prompt target" + f"{response_text}")
|
|
63
58
|
return response_entry
|
|
64
59
|
|
|
65
60
|
def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
|