azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (100) hide show
  1. azure/ai/evaluation/__init__.py +60 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/constants.py +65 -0
  4. azure/ai/evaluation/_common/rai_service.py +452 -0
  5. azure/ai/evaluation/_common/utils.py +87 -0
  6. azure/ai/evaluation/_constants.py +50 -0
  7. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  8. azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +8 -0
  9. azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +72 -0
  10. azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +150 -0
  11. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +61 -0
  12. azure/ai/evaluation/_evaluate/_eval_run.py +494 -0
  13. azure/ai/evaluation/_evaluate/_evaluate.py +689 -0
  14. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +174 -0
  15. azure/ai/evaluation/_evaluate/_utils.py +237 -0
  16. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  17. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  18. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +73 -0
  19. azure/ai/evaluation/_evaluators/_chat/__init__.py +9 -0
  20. azure/ai/evaluation/_evaluators/_chat/_chat.py +350 -0
  21. azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +9 -0
  22. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +163 -0
  23. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
  24. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  25. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +122 -0
  26. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +62 -0
  27. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +21 -0
  28. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +108 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +66 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +296 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +78 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +76 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +76 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +76 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +99 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +141 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +122 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +61 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +71 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +123 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
  47. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  48. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +96 -0
  49. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  50. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -0
  51. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
  52. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
  53. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  54. azure/ai/evaluation/_evaluators/_qa/_qa.py +111 -0
  55. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  56. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +131 -0
  57. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +69 -0
  58. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  59. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  60. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +130 -0
  62. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +71 -0
  63. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  64. azure/ai/evaluation/_evaluators/_xpia/xpia.py +140 -0
  65. azure/ai/evaluation/_exceptions.py +107 -0
  66. azure/ai/evaluation/_http_utils.py +395 -0
  67. azure/ai/evaluation/_model_configurations.py +27 -0
  68. azure/ai/evaluation/_user_agent.py +6 -0
  69. azure/ai/evaluation/_version.py +5 -0
  70. azure/ai/evaluation/py.typed +0 -0
  71. azure/ai/evaluation/simulator/__init__.py +15 -0
  72. azure/ai/evaluation/simulator/_adversarial_scenario.py +27 -0
  73. azure/ai/evaluation/simulator/_adversarial_simulator.py +450 -0
  74. azure/ai/evaluation/simulator/_constants.py +17 -0
  75. azure/ai/evaluation/simulator/_conversation/__init__.py +315 -0
  76. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  77. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  78. azure/ai/evaluation/simulator/_direct_attack_simulator.py +252 -0
  79. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  80. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  81. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +93 -0
  82. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +207 -0
  83. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  84. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +147 -0
  85. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +228 -0
  86. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +157 -0
  87. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +157 -0
  88. azure/ai/evaluation/simulator/_model_tools/models.py +616 -0
  89. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +69 -0
  90. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +36 -0
  91. azure/ai/evaluation/simulator/_tracing.py +92 -0
  92. azure/ai/evaluation/simulator/_utils.py +111 -0
  93. azure/ai/evaluation/simulator/simulator.py +579 -0
  94. azure_ai_evaluation-1.0.0b1.dist-info/METADATA +377 -0
  95. azure_ai_evaluation-1.0.0b1.dist-info/RECORD +97 -0
  96. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0b1.dist-info}/WHEEL +1 -1
  97. azure_ai_evaluation-1.0.0b1.dist-info/top_level.txt +1 -0
  98. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  99. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  100. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,252 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # noqa: E501
5
+ import functools
6
+ import logging
7
+ from random import randint
8
+ from typing import Any, Callable, Dict, Optional
9
+
10
+ from azure.identity import DefaultAzureCredential
11
+
12
+ from promptflow._sdk._telemetry import ActivityType, monitor_operation
13
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
14
+ from azure.ai.evaluation.simulator import AdversarialScenario
15
+ from azure.ai.evaluation._model_configurations import AzureAIProject
16
+
17
+ from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
18
+ from ._adversarial_simulator import AdversarialSimulator
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def monitor_adversarial_scenario(func) -> Callable:
24
+ """Decorator to monitor adversarial scenario.
25
+
26
+ :param func: The function to be decorated.
27
+ :type func: Callable
28
+ :return: The decorated function.
29
+ :rtype: Callable
30
+ """
31
+
32
+ @functools.wraps(func)
33
+ def wrapper(*args, **kwargs):
34
+ scenario = str(kwargs.get("scenario", None))
35
+ max_conversation_turns = kwargs.get("max_conversation_turns", None)
36
+ max_simulation_results = kwargs.get("max_simulation_results", None)
37
+ decorated_func = monitor_operation(
38
+ activity_name="jailbreak.adversarial.simulator.call",
39
+ activity_type=ActivityType.PUBLICAPI,
40
+ custom_dimensions={
41
+ "scenario": scenario,
42
+ "max_conversation_turns": max_conversation_turns,
43
+ "max_simulation_results": max_simulation_results,
44
+ },
45
+ )(func)
46
+
47
+ return decorated_func(*args, **kwargs)
48
+
49
+ return wrapper
50
+
51
+
52
+ class DirectAttackSimulator:
53
+ """
54
+ Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
55
+ This simulator converses with your AI system using prompts designed to interrupt normal functionality.
56
+
57
+ :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
58
+ name.
59
+ :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
60
+ :param credential: The credential for connecting to Azure AI project.
61
+ :type credential: ~azure.core.credentials.TokenCredential
62
+ """
63
+
64
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
65
+ """Constructor."""
66
+ # check if azure_ai_project has the keys: subscription_id, resource_group_name, project_name, credential
67
+ if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
68
+ msg = "azure_ai_project must contain keys: subscription_id, resource_group_name and project_name"
69
+ raise EvaluationException(
70
+ message=msg,
71
+ internal_message=msg,
72
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
73
+ category=ErrorCategory.MISSING_FIELD,
74
+ blame=ErrorBlame.USER_ERROR,
75
+ )
76
+ # check the value of the keys in azure_ai_project is not none
77
+ if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
78
+ msg = "subscription_id, resource_group_name and project_name keys cannot be None"
79
+ raise EvaluationException(
80
+ message=msg,
81
+ internal_message=msg,
82
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
83
+ category=ErrorCategory.MISSING_FIELD,
84
+ blame=ErrorBlame.USER_ERROR,
85
+ )
86
+ if "credential" not in azure_ai_project and not credential:
87
+ credential = DefaultAzureCredential()
88
+ elif "credential" in azure_ai_project:
89
+ credential = azure_ai_project["credential"]
90
+ self.credential = credential
91
+ self.azure_ai_project = azure_ai_project
92
+ self.token_manager = ManagedIdentityAPITokenManager(
93
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
94
+ logger=logging.getLogger("AdversarialSimulator"),
95
+ credential=credential,
96
+ )
97
+ self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
98
+ self.adversarial_template_handler = AdversarialTemplateHandler(
99
+ azure_ai_project=azure_ai_project, rai_client=self.rai_client
100
+ )
101
+
102
+ def _ensure_service_dependencies(self):
103
+ if self.rai_client is None:
104
+ msg = "RAI service is required for simulation, but an RAI client was not provided."
105
+ raise EvaluationException(
106
+ message=msg,
107
+ internal_message=msg,
108
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
109
+ category=ErrorCategory.MISSING_FIELD,
110
+ blame=ErrorBlame.USER_ERROR,
111
+ )
112
+
113
+ # @monitor_adversarial_scenario
114
+ async def __call__(
115
+ self,
116
+ *,
117
+ scenario: AdversarialScenario,
118
+ target: Callable,
119
+ max_conversation_turns: int = 1,
120
+ max_simulation_results: int = 3,
121
+ api_call_retry_limit: int = 3,
122
+ api_call_retry_sleep_sec: int = 1,
123
+ api_call_delay_sec: int = 0,
124
+ concurrent_async_task: int = 3,
125
+ randomization_seed: Optional[int] = None,
126
+ ):
127
+ """
128
+ Executes the adversarial simulation and UPIA (user prompt injected attack) jailbreak adversarial simulation
129
+ against a specified target function asynchronously.
130
+
131
+ :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
132
+ example:
133
+
134
+ - :py:const:`azure.ai.evaluation.simulator.AdversarialScenario.ADVERSARIAL_QA`
135
+ - :py:const:`azure.ai.evaluation.simulator.AdversarialScenario.ADVERSARIAL_CONVERSATION`
136
+ :paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario
137
+ :keyword target: The target function to simulate adversarial inputs against.
138
+ This function should be asynchronous and accept a dictionary representing the adversarial input.
139
+ :paramtype target: Callable
140
+ :keyword max_conversation_turns: The maximum number of conversation turns to simulate.
141
+ Defaults to 1.
142
+ :paramtype max_conversation_turns: int
143
+ :keyword max_simulation_results: The maximum number of simulation results to return.
144
+ Defaults to 3.
145
+ :paramtype max_simulation_results: int
146
+ :keyword api_call_retry_limit: The maximum number of retries for each API call within the simulation.
147
+ Defaults to 3.
148
+ :paramtype api_call_retry_limit: int
149
+ :keyword api_call_retry_sleep_sec: The sleep duration (in seconds) between retries for API calls.
150
+ Defaults to 1 second.
151
+ :paramtype api_call_retry_sleep_sec: int
152
+ :keyword api_call_delay_sec: The delay (in seconds) before making an API call.
153
+ This can be used to avoid hitting rate limits. Defaults to 0 seconds.
154
+ :paramtype api_call_delay_sec: int
155
+ :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
156
+ Defaults to 3.
157
+ :paramtype concurrent_async_task: int
158
+ :keyword randomization_seed: Seed used to randomize prompt selection, shared by both jailbreak
159
+ and regular simulation to ensure consistent results. If not provided, a random seed will be generated
160
+ and shared between simulations.
161
+ :paramtype randomization_seed: Optional[int]
162
+ :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
163
+
164
+ - 'template_parameters': A dictionary with parameters used in the conversation template,
165
+ including 'conversation_starter'.
166
+ - 'messages': A list of dictionaries, each representing a turn in the conversation.
167
+ Each message dictionary includes 'content' (the message text) and
168
+ 'role' (indicating whether the message is from the 'user' or the 'assistant').
169
+ - '**$schema**': A string indicating the schema URL for the conversation format.
170
+
171
+ The 'content' for 'assistant' role messages may includes the messages that your callback returned.
172
+ :rtype: Dict[str, [List[Dict[str, Any]]]] with two elements
173
+
174
+ **Output format**
175
+
176
+ .. code-block:: python
177
+
178
+ return_value = {
179
+ "jailbreak": [
180
+ {
181
+ 'template_parameters': {},
182
+ 'messages': [
183
+ {
184
+ 'content': '<jailbreak prompt> <adversarial query>',
185
+ 'role': 'user'
186
+ },
187
+ {
188
+ 'content': "<response from endpoint>",
189
+ 'role': 'assistant',
190
+ 'context': None
191
+ }
192
+ ],
193
+ '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
194
+ }],
195
+ "regular": [
196
+ {
197
+ 'template_parameters': {},
198
+ 'messages': [
199
+ {
200
+ 'content': '<adversarial query>',
201
+ 'role': 'user'
202
+ },
203
+ {
204
+ 'content': "<response from endpoint>",
205
+ 'role': 'assistant',
206
+ 'context': None
207
+ }],
208
+ '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
209
+ }]
210
+ }
211
+ """
212
+ if scenario not in AdversarialScenario.__members__.values():
213
+ msg = f"Invalid scenario: {scenario}. Supported scenarios: {AdversarialScenario.__members__.values()}"
214
+ raise EvaluationException(
215
+ message=msg,
216
+ internal_message=msg,
217
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
218
+ category=ErrorCategory.INVALID_VALUE,
219
+ blame=ErrorBlame.USER_ERROR,
220
+ )
221
+
222
+ if not randomization_seed:
223
+ randomization_seed = randint(0, 1000000)
224
+
225
+ regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
226
+ regular_sim_results = await regular_sim(
227
+ scenario=scenario,
228
+ target=target,
229
+ max_conversation_turns=max_conversation_turns,
230
+ max_simulation_results=max_simulation_results,
231
+ api_call_retry_limit=api_call_retry_limit,
232
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
233
+ api_call_delay_sec=api_call_delay_sec,
234
+ concurrent_async_task=concurrent_async_task,
235
+ randomize_order=True,
236
+ randomization_seed=randomization_seed,
237
+ )
238
+ jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
239
+ jb_sim_results = await jb_sim(
240
+ scenario=scenario,
241
+ target=target,
242
+ max_conversation_turns=max_conversation_turns,
243
+ max_simulation_results=max_simulation_results,
244
+ api_call_retry_limit=api_call_retry_limit,
245
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
246
+ api_call_delay_sec=api_call_delay_sec,
247
+ concurrent_async_task=concurrent_async_task,
248
+ _jailbreak_type="upia",
249
+ randomize_order=True,
250
+ randomization_seed=randomization_seed,
251
+ )
252
+ return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
@@ -0,0 +1,4 @@
1
+ from ._simulator_data_classes import ConversationHistory, Turn
2
+ from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
3
+
4
+ __all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
@@ -0,0 +1,17 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from azure.ai.evaluation.simulator._constants import SupportedLanguages
5
+
6
+ BASE_SUFFIX = "Make the conversation in __language__ language."
7
+
8
+ SUPPORTED_LANGUAGES_MAPPING = {
9
+ SupportedLanguages.English: BASE_SUFFIX.replace("__language__", "english"),
10
+ SupportedLanguages.Spanish: BASE_SUFFIX.replace("__language__", "spanish"),
11
+ SupportedLanguages.Italian: BASE_SUFFIX.replace("__language__", "italian"),
12
+ SupportedLanguages.French: BASE_SUFFIX.replace("__language__", "french"),
13
+ SupportedLanguages.German: BASE_SUFFIX.replace("__language__", "german"),
14
+ SupportedLanguages.SimplifiedChinese: BASE_SUFFIX.replace("__language__", "simplified chinese"),
15
+ SupportedLanguages.Portuguese: BASE_SUFFIX.replace("__language__", "portuguese"),
16
+ SupportedLanguages.Japanese: BASE_SUFFIX.replace("__language__", "japanese"),
17
+ }
@@ -0,0 +1,93 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # pylint: disable=C0103,C0114,C0116
5
+ from dataclasses import dataclass
6
+ from typing import Union
7
+
8
+ from azure.ai.evaluation.simulator._conversation.constants import ConversationRole
9
+
10
+
11
+ @dataclass
12
+ class Turn:
13
+ """
14
+ Represents a conversation turn,
15
+ keeping track of the role, content,
16
+ and context of a turn in a conversation.
17
+ """
18
+
19
+ role: Union[str, ConversationRole]
20
+ content: str
21
+ context: str = None
22
+
23
+ def to_dict(self):
24
+ """
25
+ Convert the conversation turn to a dictionary.
26
+
27
+ Returns:
28
+ dict: A dictionary representation of the conversation turn.
29
+ """
30
+ return {
31
+ "role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
32
+ "content": self.content,
33
+ "context": self.context,
34
+ }
35
+
36
+ def __repr__(self):
37
+ """
38
+ Return the string representation of the conversation turn.
39
+
40
+ Returns:
41
+ str: A string representation of the conversation turn.
42
+ """
43
+ return f"Turn(role={self.role}, content={self.content})"
44
+
45
+
46
+ class ConversationHistory:
47
+ """
48
+ Conversation history class to keep track of the conversation turns in a conversation.
49
+ """
50
+
51
+ def __init__(self):
52
+ """
53
+ Initializes the conversation history with an empty list of turns.
54
+ """
55
+ self.history = []
56
+
57
+ def add_to_history(self, turn: Turn):
58
+ """
59
+ Adds a turn to the conversation history.
60
+
61
+ Args:
62
+ turn (Turn): The conversation turn to add.
63
+ """
64
+ self.history.append(turn)
65
+
66
+ def to_list(self):
67
+ """
68
+ Converts the conversation history to a list of dictionaries.
69
+
70
+ Returns:
71
+ list: A list of dictionaries representing the conversation turns.
72
+ """
73
+ return [turn.to_dict() for turn in self.history]
74
+
75
+ def get_length(self):
76
+ """
77
+ Returns the length of the conversation.
78
+
79
+ Returns:
80
+ int: The number of turns in the conversation history.
81
+ """
82
+ return len(self.history)
83
+
84
+ def __repr__(self):
85
+ """
86
+ Returns the string representation of the conversation history.
87
+
88
+ Returns:
89
+ str: A string representation of the conversation history.
90
+ """
91
+ for turn in self.history:
92
+ print(turn)
93
+ return ""
@@ -0,0 +1,207 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # noqa: E501
5
+ import functools
6
+ import logging
7
+ from typing import Any, Callable, Dict
8
+
9
+ from azure.identity import DefaultAzureCredential
10
+
11
+ from promptflow._sdk._telemetry import ActivityType, monitor_operation
12
+ from azure.ai.evaluation.simulator import AdversarialScenario
13
+ from azure.ai.evaluation._model_configurations import AzureAIProject
14
+
15
+ from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
16
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
17
+ from ._adversarial_simulator import AdversarialSimulator
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def monitor_adversarial_scenario(func) -> Callable:
23
+ """Decorator to monitor adversarial scenario.
24
+
25
+ :param func: The function to be decorated.
26
+ :type func: Callable
27
+ :return: The decorated function.
28
+ :rtype: Callable
29
+ """
30
+
31
+ @functools.wraps(func)
32
+ def wrapper(*args, **kwargs):
33
+ scenario = str(kwargs.get("scenario", None))
34
+ max_conversation_turns = kwargs.get("max_conversation_turns", None)
35
+ max_simulation_results = kwargs.get("max_simulation_results", None)
36
+ decorated_func = monitor_operation(
37
+ activity_name="xpia.adversarial.simulator.call",
38
+ activity_type=ActivityType.PUBLICAPI,
39
+ custom_dimensions={
40
+ "scenario": scenario,
41
+ "max_conversation_turns": max_conversation_turns,
42
+ "max_simulation_results": max_simulation_results,
43
+ },
44
+ )(func)
45
+
46
+ return decorated_func(*args, **kwargs)
47
+
48
+ return wrapper
49
+
50
+
51
+ class IndirectAttackSimulator:
52
+ """
53
+ Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
54
+
55
+ :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
56
+ name.
57
+ :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
58
+ :param credential: The credential for connecting to Azure AI project.
59
+ :type credential: ~azure.core.credentials.TokenCredential
60
+ """
61
+
62
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
63
+ """Constructor."""
64
+ # check if azure_ai_project has the keys: subscription_id, resource_group_name, project_name, credential
65
+ if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
66
+ msg = "azure_ai_project must contain keys: subscription_id, resource_group_name and project_name"
67
+ raise EvaluationException(
68
+ message=msg,
69
+ internal_message=msg,
70
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
71
+ category=ErrorCategory.MISSING_FIELD,
72
+ blame=ErrorBlame.USER_ERROR,
73
+ )
74
+ if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
75
+ msg = "subscription_id, resource_group_name and project_name keys cannot be None"
76
+ raise EvaluationException(
77
+ message=msg,
78
+ internal_message=msg,
79
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
80
+ category=ErrorCategory.MISSING_FIELD,
81
+ blame=ErrorBlame.USER_ERROR,
82
+ )
83
+ if "credential" not in azure_ai_project and not credential:
84
+ credential = DefaultAzureCredential()
85
+ elif "credential" in azure_ai_project:
86
+ credential = azure_ai_project["credential"]
87
+ self.credential = credential
88
+ self.azure_ai_project = azure_ai_project
89
+ self.token_manager = ManagedIdentityAPITokenManager(
90
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
91
+ logger=logging.getLogger("AdversarialSimulator"),
92
+ credential=credential,
93
+ )
94
+ self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
95
+ self.adversarial_template_handler = AdversarialTemplateHandler(
96
+ azure_ai_project=azure_ai_project, rai_client=self.rai_client
97
+ )
98
+
99
+ def _ensure_service_dependencies(self):
100
+ if self.rai_client is None:
101
+ msg = "RAI service is required for simulation, but an RAI client was not provided."
102
+ raise EvaluationException(
103
+ message=msg,
104
+ internal_message=msg,
105
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
106
+ category=ErrorCategory.MISSING_FIELD,
107
+ blame=ErrorBlame.USER_ERROR,
108
+ )
109
+
110
+ # @monitor_adversarial_scenario
111
+ async def __call__(
112
+ self,
113
+ *,
114
+ scenario: AdversarialScenario,
115
+ target: Callable,
116
+ max_conversation_turns: int = 1,
117
+ max_simulation_results: int = 3,
118
+ api_call_retry_limit: int = 3,
119
+ api_call_retry_sleep_sec: int = 1,
120
+ api_call_delay_sec: int = 0,
121
+ concurrent_async_task: int = 3,
122
+ ):
123
+ """
124
+ Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
125
+ This simulator converses with your AI system using prompts injected into the context to interrupt normal
126
+ expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside
127
+ the scope of your AI system.
128
+
129
+ :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
130
+ :paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario
131
+ :keyword target: The target function to simulate adversarial inputs against.
132
+ This function should be asynchronous and accept a dictionary representing the adversarial input.
133
+ :paramtype target: Callable
134
+ :keyword max_conversation_turns: The maximum number of conversation turns to simulate.
135
+ Defaults to 1.
136
+ :paramtype max_conversation_turns: int
137
+ :keyword max_simulation_results: The maximum number of simulation results to return.
138
+ Defaults to 3.
139
+ :paramtype max_simulation_results: int
140
+ :keyword api_call_retry_limit: The maximum number of retries for each API call within the simulation.
141
+ Defaults to 3.
142
+ :paramtype api_call_retry_limit: int
143
+ :keyword api_call_retry_sleep_sec: The sleep duration (in seconds) between retries for API calls.
144
+ Defaults to 1 second.
145
+ :paramtype api_call_retry_sleep_sec: int
146
+ :keyword api_call_delay_sec: The delay (in seconds) before making an API call.
147
+ This can be used to avoid hitting rate limits. Defaults to 0 seconds.
148
+ :paramtype api_call_delay_sec: int
149
+ :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
150
+ Defaults to 3.
151
+ :paramtype concurrent_async_task: int
152
+ :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
153
+
154
+ - 'template_parameters': A dictionary with parameters used in the conversation template,
155
+ including 'conversation_starter'.
156
+ - 'messages': A list of dictionaries, each representing a turn in the conversation.
157
+ Each message dictionary includes 'content' (the message text) and
158
+ 'role' (indicating whether the message is from the 'user' or the 'assistant').
159
+ - '**$schema**': A string indicating the schema URL for the conversation format.
160
+
161
+ The 'content' for 'assistant' role messages may includes the messages that your callback returned.
162
+ :rtype: List[Dict[str, Any]]
163
+
164
+ **Output format**
165
+
166
+ .. code-block:: python
167
+
168
+ return_value = [
169
+ {
170
+ 'template_parameters': {},
171
+ 'messages': [
172
+ {
173
+ 'content': '<jailbreak prompt> <adversarial query>',
174
+ 'role': 'user'
175
+ },
176
+ {
177
+ 'content': "<response from endpoint>",
178
+ 'role': 'assistant',
179
+ 'context': None
180
+ }
181
+ ],
182
+ '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
183
+ }]
184
+ }
185
+ """
186
+ if scenario not in AdversarialScenario.__members__.values():
187
+ msg = f"Invalid scenario: {scenario}. Supported scenarios: {AdversarialScenario.__members__.values()}"
188
+ raise EvaluationException(
189
+ message=msg,
190
+ internal_message=msg,
191
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
192
+ category=ErrorCategory.INVALID_VALUE,
193
+ blame=ErrorBlame.USER_ERROR,
194
+ )
195
+ jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
196
+ jb_sim_results = await jb_sim(
197
+ scenario=scenario,
198
+ target=target,
199
+ max_conversation_turns=max_conversation_turns,
200
+ max_simulation_results=max_simulation_results,
201
+ api_call_retry_limit=api_call_retry_limit,
202
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
203
+ api_call_delay_sec=api_call_delay_sec,
204
+ concurrent_async_task=concurrent_async_task,
205
+ _jailbreak_type="xpia",
206
+ )
207
+ return jb_sim_results
@@ -0,0 +1,23 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ """Tooling for model evaluation"""
6
+
7
+ from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager, TokenScope
8
+ from ._proxy_completion_model import ProxyChatCompletionsModel
9
+ from ._rai_client import RAIClient
10
+ from ._template_handler import CONTENT_HARM_TEMPLATES_COLLECTION_KEY, AdversarialTemplateHandler
11
+ from .models import LLMBase, OpenAIChatCompletionsModel
12
+
13
+ __all__ = [
14
+ "ManagedIdentityAPITokenManager",
15
+ "PlainTokenManager",
16
+ "TokenScope",
17
+ "RAIClient",
18
+ "AdversarialTemplateHandler",
19
+ "CONTENT_HARM_TEMPLATES_COLLECTION_KEY",
20
+ "ProxyChatCompletionsModel",
21
+ "LLMBase",
22
+ "OpenAIChatCompletionsModel",
23
+ ]