azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. azure/ai/evaluation/__init__.py +82 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/_experimental.py +172 -0
  4. azure/ai/evaluation/_common/constants.py +72 -0
  5. azure/ai/evaluation/_common/math.py +89 -0
  6. azure/ai/evaluation/_common/rai_service.py +632 -0
  7. azure/ai/evaluation/_common/utils.py +445 -0
  8. azure/ai/evaluation/_constants.py +72 -0
  9. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  10. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
  11. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
  12. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
  13. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
  14. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  15. azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
  16. azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
  17. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
  18. azure/ai/evaluation/_evaluate/_utils.py +298 -0
  19. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  20. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  21. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
  22. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  23. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
  24. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
  25. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  26. azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
  27. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
  28. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  47. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  48. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  49. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
  50. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  51. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  52. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  53. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  54. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  55. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  56. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  57. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  58. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  59. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
  60. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
  62. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
  64. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
  65. azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
  66. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
  67. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  68. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  69. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  70. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  72. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  73. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
  74. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
  75. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  76. azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
  77. azure/ai/evaluation/_exceptions.py +128 -0
  78. azure/ai/evaluation/_http_utils.py +466 -0
  79. azure/ai/evaluation/_model_configurations.py +123 -0
  80. azure/ai/evaluation/_user_agent.py +6 -0
  81. azure/ai/evaluation/_vendor/__init__.py +3 -0
  82. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  83. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  84. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  85. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  86. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  87. azure/ai/evaluation/_version.py +5 -0
  88. azure/ai/evaluation/py.typed +0 -0
  89. azure/ai/evaluation/simulator/__init__.py +16 -0
  90. azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
  91. azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
  92. azure/ai/evaluation/simulator/_constants.py +27 -0
  93. azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
  94. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  95. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  96. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  97. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  98. azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
  99. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  100. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  101. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
  102. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
  103. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  104. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
  105. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
  106. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
  107. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
  108. azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
  109. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  110. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
  111. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
  112. azure/ai/evaluation/simulator/_simulator.py +716 -0
  113. azure/ai/evaluation/simulator/_tracing.py +89 -0
  114. azure/ai/evaluation/simulator/_utils.py +132 -0
  115. azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
  116. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
  117. azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
  118. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
  119. azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
  120. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  121. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  122. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,471 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # noqa: E501
5
+ # pylint: disable=E0401,E0611
6
+ import asyncio
7
+ import logging
8
+ import random
9
+ from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
10
+ from itertools import zip_longest
11
+
12
+ from tqdm import tqdm
13
+
14
+ from azure.ai.evaluation._common._experimental import experimental
15
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project
16
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
+ from azure.ai.evaluation._http_utils import get_async_http_client
18
+ from azure.ai.evaluation._model_configurations import AzureAIProject
19
+ from azure.ai.evaluation.simulator import AdversarialScenario
20
+ from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
21
+ from azure.core.credentials import TokenCredential
22
+ from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
23
+
24
+ from ._constants import SupportedLanguages
25
+ from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
26
+ from ._conversation._conversation import simulate_conversation
27
+ from ._model_tools import (
28
+ AdversarialTemplateHandler,
29
+ ManagedIdentityAPITokenManager,
30
+ ProxyChatCompletionsModel,
31
+ RAIClient,
32
+ TokenScope,
33
+ )
34
+ from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
35
+ from ._utils import JsonLineList
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ @experimental
41
+ class AdversarialSimulator:
42
+ """
43
+ Initializes the adversarial simulator with a project scope.
44
+
45
+ :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
46
+ name.
47
+ :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
48
+ :param credential: The credential for connecting to Azure AI project.
49
+ :type credential: ~azure.core.credentials.TokenCredential
50
+
51
+ .. admonition:: Example:
52
+
53
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
54
+ :start-after: [START adversarial_scenario]
55
+ :end-before: [END adversarial_scenario]
56
+ :language: python
57
+ :dedent: 8
58
+ :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
59
+ 2 conversation turns each (4 messages per result).
60
+ """
61
+
62
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
63
+ """Constructor."""
64
+
65
+ try:
66
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
67
+ except EvaluationException as e:
68
+ raise EvaluationException(
69
+ message=e.message,
70
+ internal_message=e.internal_message,
71
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
72
+ category=e.category,
73
+ blame=e.blame,
74
+ ) from e
75
+
76
+ self.token_manager = ManagedIdentityAPITokenManager(
77
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
78
+ logger=logging.getLogger("AdversarialSimulator"),
79
+ credential=cast(TokenCredential, credential),
80
+ )
81
+ self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
82
+ self.adversarial_template_handler = AdversarialTemplateHandler(
83
+ azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
84
+ )
85
+
86
+ def _ensure_service_dependencies(self):
87
+ if self.rai_client is None:
88
+ msg = "RAI service is required for simulation, but an RAI client was not provided."
89
+ raise EvaluationException(
90
+ message=msg,
91
+ internal_message=msg,
92
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
93
+ category=ErrorCategory.MISSING_FIELD,
94
+ blame=ErrorBlame.USER_ERROR,
95
+ )
96
+
97
+ # pylint: disable=too-many-locals
98
+ async def __call__(
99
+ self,
100
+ *,
101
+ # Note: the scenario input also accepts inputs from _PrivateAdversarialScenario, but that's
102
+ # not stated since those values are nominally for internal use only.
103
+ scenario: AdversarialScenario,
104
+ target: Callable,
105
+ max_conversation_turns: int = 1,
106
+ max_simulation_results: int = 3,
107
+ api_call_retry_limit: int = 3,
108
+ api_call_retry_sleep_sec: int = 1,
109
+ api_call_delay_sec: int = 0,
110
+ concurrent_async_task: int = 3,
111
+ language: SupportedLanguages = SupportedLanguages.English,
112
+ randomize_order: bool = True,
113
+ randomization_seed: Optional[int] = None,
114
+ **kwargs,
115
+ ):
116
+ """
117
+ Executes the adversarial simulation against a specified target function asynchronously.
118
+
119
+ :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
120
+ example:
121
+
122
+ - :py:const:`azure.ai.evaluation.simulator.AdversarialScenario.ADVERSARIAL_QA`
123
+ - :py:const:`azure.ai.evaluation.simulator.AdversarialScenario.ADVERSARIAL_CONVERSATION`
124
+ :paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario
125
+ :keyword target: The target function to simulate adversarial inputs against.
126
+ This function should be asynchronous and accept a dictionary representing the adversarial input.
127
+ :paramtype target: Callable
128
+ :keyword max_conversation_turns: The maximum number of conversation turns to simulate.
129
+ Defaults to 1.
130
+ :paramtype max_conversation_turns: int
131
+ :keyword max_simulation_results: The maximum number of simulation results to return.
132
+ Defaults to 3.
133
+ :paramtype max_simulation_results: int
134
+ :keyword api_call_retry_limit: The maximum number of retries for each API call within the simulation.
135
+ Defaults to 3.
136
+ :paramtype api_call_retry_limit: int
137
+ :keyword api_call_retry_sleep_sec: The sleep duration (in seconds) between retries for API calls.
138
+ Defaults to 1 second.
139
+ :paramtype api_call_retry_sleep_sec: int
140
+ :keyword api_call_delay_sec: The delay (in seconds) before making an API call.
141
+ This can be used to avoid hitting rate limits. Defaults to 0 seconds.
142
+ :paramtype api_call_delay_sec: int
143
+ :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
144
+ Defaults to 3.
145
+ :paramtype concurrent_async_task: int
146
+ :keyword language: The language in which the conversation should be generated. Defaults to English.
147
+ :paramtype language: azure.ai.evaluation.simulator.SupportedLanguages
148
+ :keyword randomize_order: Whether or not the order of the prompts should be randomized. Defaults to True.
149
+ :paramtype randomize_order: bool
150
+ :keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
151
+ default seed is used. Defaults to None.
152
+ :paramtype randomization_seed: Optional[int]
153
+ :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
154
+
155
+ - 'template_parameters': A dictionary with parameters used in the conversation template,
156
+ including 'conversation_starter'.
157
+ - 'messages': A list of dictionaries, each representing a turn in the conversation.
158
+ Each message dictionary includes 'content' (the message text) and
159
+ 'role' (indicating whether the message is from the 'user' or the 'assistant').
160
+ - '**$schema**': A string indicating the schema URL for the conversation format.
161
+
162
+ The 'content' for 'assistant' role messages may includes the messages that your callback returned.
163
+ :rtype: List[Dict[str, Any]]
164
+ """
165
+
166
+ # validate the inputs
167
+ if scenario != AdversarialScenario.ADVERSARIAL_CONVERSATION:
168
+ max_conversation_turns = 2
169
+ else:
170
+ max_conversation_turns = max_conversation_turns * 2
171
+ if not (
172
+ scenario in AdversarialScenario.__members__.values()
173
+ or scenario in _UnstableAdversarialScenario.__members__.values()
174
+ ):
175
+ msg = f"Invalid scenario: {scenario}. Supported scenarios are: {AdversarialScenario.__members__.values()}"
176
+ raise EvaluationException(
177
+ message=msg,
178
+ internal_message=msg,
179
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
180
+ category=ErrorCategory.INVALID_VALUE,
181
+ blame=ErrorBlame.USER_ERROR,
182
+ )
183
+ self._ensure_service_dependencies()
184
+ templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
185
+ concurrent_async_task = min(concurrent_async_task, 1000)
186
+ semaphore = asyncio.Semaphore(concurrent_async_task)
187
+ sim_results = []
188
+ tasks = []
189
+ total_tasks = sum(len(t.template_parameters) for t in templates)
190
+ if max_simulation_results > total_tasks:
191
+ logger.warning(
192
+ "Cannot provide %s results due to maximum number of adversarial simulations that can be generated: %s."
193
+ "\n %s simulations will be generated.",
194
+ max_simulation_results,
195
+ total_tasks,
196
+ total_tasks,
197
+ )
198
+ total_tasks = min(total_tasks, max_simulation_results)
199
+ _jailbreak_type = kwargs.get("_jailbreak_type", None)
200
+ if _jailbreak_type:
201
+ jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
202
+ progress_bar = tqdm(
203
+ total=total_tasks,
204
+ desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
205
+ ncols=100,
206
+ unit="simulations",
207
+ )
208
+
209
+ if randomize_order:
210
+ # The template parameter lists are persistent across sim runs within a session,
211
+ # So randomize a the selection instead of the parameter list directly,
212
+ # or a potentially large deep copy.
213
+ if randomization_seed is not None:
214
+ random.seed(randomization_seed)
215
+ random.shuffle(templates)
216
+ parameter_lists = [t.template_parameters for t in templates]
217
+ zipped_parameters = list(zip_longest(*parameter_lists))
218
+ for param_group in zipped_parameters:
219
+ for template, parameter in zip(templates, param_group):
220
+ if _jailbreak_type == "upia":
221
+ parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
222
+ tasks.append(
223
+ asyncio.create_task(
224
+ self._simulate_async(
225
+ target=target,
226
+ template=template,
227
+ parameters=parameter,
228
+ max_conversation_turns=max_conversation_turns,
229
+ api_call_retry_limit=api_call_retry_limit,
230
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
231
+ api_call_delay_sec=api_call_delay_sec,
232
+ language=language,
233
+ semaphore=semaphore,
234
+ )
235
+ )
236
+ )
237
+ if len(tasks) >= max_simulation_results:
238
+ break
239
+ if len(tasks) >= max_simulation_results:
240
+ break
241
+ for task in asyncio.as_completed(tasks):
242
+ sim_results.append(await task)
243
+ progress_bar.update(1)
244
+ progress_bar.close()
245
+
246
+ return JsonLineList(sim_results)
247
+
248
+ def _to_chat_protocol(
249
+ self,
250
+ *,
251
+ conversation_history: List[ConversationTurn],
252
+ template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
253
+ ):
254
+ if template_parameters is None:
255
+ template_parameters = {}
256
+ messages = []
257
+ for _, m in enumerate(conversation_history):
258
+ message = {"content": m.message, "role": m.role.value}
259
+ if m.full_response is not None and "context" in m.full_response:
260
+ message["context"] = m.full_response["context"]
261
+ messages.append(message)
262
+ conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
263
+ template_parameters["metadata"] = {}
264
+ for key in (
265
+ "conversation_starter",
266
+ "group_of_people",
267
+ "target_population",
268
+ "topic",
269
+ "ch_template_placeholder",
270
+ "chatbot_name",
271
+ "name",
272
+ "group",
273
+ ):
274
+ template_parameters.pop(key, None)
275
+ if conversation_category:
276
+ template_parameters["category"] = conversation_category
277
+ return {
278
+ "template_parameters": template_parameters,
279
+ "messages": messages,
280
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
281
+ }
282
+
283
+ async def _simulate_async(
284
+ self,
285
+ *,
286
+ target: Callable,
287
+ template: AdversarialTemplate,
288
+ parameters: TemplateParameters,
289
+ max_conversation_turns: int,
290
+ api_call_retry_limit: int,
291
+ api_call_retry_sleep_sec: int,
292
+ api_call_delay_sec: int,
293
+ language: SupportedLanguages,
294
+ semaphore: asyncio.Semaphore,
295
+ ) -> List[Dict]:
296
+ user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
297
+ system_bot = self._setup_bot(
298
+ target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
299
+ )
300
+ bots = [user_bot, system_bot]
301
+ session = get_async_http_client().with_policies(
302
+ retry_policy=AsyncRetryPolicy(
303
+ retry_total=api_call_retry_limit,
304
+ retry_backoff_factor=api_call_retry_sleep_sec,
305
+ retry_mode=RetryMode.Fixed,
306
+ )
307
+ )
308
+
309
+ async with semaphore, session:
310
+ _, conversation_history = await simulate_conversation(
311
+ bots=bots,
312
+ session=session,
313
+ turn_limit=max_conversation_turns,
314
+ api_call_delay_sec=api_call_delay_sec,
315
+ language=language,
316
+ )
317
+
318
+ return self._to_chat_protocol(
319
+ conversation_history=conversation_history,
320
+ template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
321
+ )
322
+
323
+ def _get_user_proxy_completion_model(
324
+ self, template_key: str, template_parameters: TemplateParameters
325
+ ) -> ProxyChatCompletionsModel:
326
+ return ProxyChatCompletionsModel(
327
+ name="raisvc_proxy_model",
328
+ template_key=template_key,
329
+ template_parameters=template_parameters,
330
+ endpoint_url=self.rai_client.simulation_submit_endpoint,
331
+ token_manager=self.token_manager,
332
+ api_version="2023-07-01-preview",
333
+ max_tokens=1200,
334
+ temperature=0.0,
335
+ )
336
+
337
+ def _setup_bot(
338
+ self,
339
+ *,
340
+ role: ConversationRole,
341
+ template: AdversarialTemplate,
342
+ parameters: TemplateParameters,
343
+ target: Optional[Callable] = None,
344
+ ) -> ConversationBot:
345
+ if role is ConversationRole.USER:
346
+ model = self._get_user_proxy_completion_model(
347
+ template_key=template.template_name, template_parameters=parameters
348
+ )
349
+ return ConversationBot(
350
+ role=role,
351
+ model=model,
352
+ conversation_template=str(template),
353
+ instantiation_parameters=parameters,
354
+ )
355
+
356
+ if role is ConversationRole.ASSISTANT:
357
+ if target is None:
358
+ msg = "Cannot setup system bot. Target is None"
359
+
360
+ raise EvaluationException(
361
+ message=msg,
362
+ internal_message=msg,
363
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
364
+ error_category=ErrorCategory.INVALID_VALUE,
365
+ blame=ErrorBlame.SYSTEM_ERROR,
366
+ )
367
+
368
+ class DummyModel:
369
+ def __init__(self):
370
+ self.name = "dummy_model"
371
+
372
+ def __call__(self) -> None:
373
+ pass
374
+
375
+ return CallbackConversationBot(
376
+ callback=target,
377
+ role=role,
378
+ model=DummyModel(),
379
+ user_template=str(template),
380
+ user_template_parameters=parameters,
381
+ conversation_template="",
382
+ instantiation_parameters={},
383
+ )
384
+
385
+ msg = "Invalid value for enum ConversationRole. This should never happen."
386
+ raise EvaluationException(
387
+ message=msg,
388
+ internal_message=msg,
389
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
390
+ category=ErrorCategory.INVALID_VALUE,
391
+ blame=ErrorBlame.SYSTEM_ERROR,
392
+ )
393
+
394
+ def _join_conversation_starter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
395
+ key: Literal["conversation_starter"] = "conversation_starter"
396
+ if key in parameters.keys():
397
+ parameters[key] = f"{to_join} {parameters[key]}"
398
+ else:
399
+ parameters[key] = to_join
400
+
401
+ return parameters
402
+
403
+ def call_sync(
404
+ self,
405
+ *,
406
+ scenario: AdversarialScenario,
407
+ max_conversation_turns: int,
408
+ max_simulation_results: int,
409
+ target: Callable,
410
+ api_call_retry_limit: int,
411
+ api_call_retry_sleep_sec: int,
412
+ api_call_delay_sec: int,
413
+ concurrent_async_task: int,
414
+ ) -> List[Dict[str, Any]]:
415
+ """Call the adversarial simulator synchronously.
416
+ :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
417
+ example:
418
+
419
+ - :py:const:`azure.ai.evaluation.simulator.adversarial_scenario.AdversarialScenario.ADVERSARIAL_QA`
420
+ - :py:const:`azure.ai.evaluation.simulator.adversarial_scenario.AdversarialScenario.ADVERSARIAL_CONVERSATION`
421
+ :paramtype scenario: azure.ai.evaluation.simulator.adversarial_scenario.AdversarialScenario
422
+
423
+ :keyword max_conversation_turns: The maximum number of conversation turns to simulate.
424
+ :paramtype max_conversation_turns: int
425
+ :keyword max_simulation_results: The maximum number of simulation results to return.
426
+ :paramtype max_simulation_results: int
427
+ :keyword target: The target function to simulate adversarial inputs against.
428
+ :paramtype target: Callable
429
+ :keyword api_call_retry_limit: The maximum number of retries for each API call within the simulation.
430
+ :paramtype api_call_retry_limit: int
431
+ :keyword api_call_retry_sleep_sec: The sleep duration (in seconds) between retries for API calls.
432
+ :paramtype api_call_retry_sleep_sec: int
433
+ :keyword api_call_delay_sec: The delay (in seconds) before making an API call.
434
+ :paramtype api_call_delay_sec: int
435
+ :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
436
+ :paramtype concurrent_async_task: int
437
+ :return: A list of dictionaries, each representing a simulated conversation.
438
+ :rtype: List[Dict[str, Any]]
439
+ """
440
+ # Running the async method in a synchronous context
441
+ loop = asyncio.get_event_loop()
442
+ if loop.is_running():
443
+ # If the loop is already running, use run_until_complete
444
+ # Note: This approach might not be suitable in all contexts, especially with nested async calls
445
+ future = asyncio.ensure_future(
446
+ self(
447
+ scenario=scenario,
448
+ max_conversation_turns=max_conversation_turns,
449
+ max_simulation_results=max_simulation_results,
450
+ target=target,
451
+ api_call_retry_limit=api_call_retry_limit,
452
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
453
+ api_call_delay_sec=api_call_delay_sec,
454
+ concurrent_async_task=concurrent_async_task,
455
+ )
456
+ )
457
+ return loop.run_until_complete(future)
458
+
459
+ # If no event loop is running, use asyncio.run (Python 3.7+)
460
+ return asyncio.run(
461
+ self(
462
+ scenario=scenario,
463
+ max_conversation_turns=max_conversation_turns,
464
+ max_simulation_results=max_simulation_results,
465
+ target=target,
466
+ api_call_retry_limit=api_call_retry_limit,
467
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
468
+ api_call_delay_sec=api_call_delay_sec,
469
+ concurrent_async_task=concurrent_async_task,
470
+ )
471
+ )
@@ -0,0 +1,27 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from enum import Enum
5
+
6
+
7
+ class SupportedLanguages(Enum):
8
+ """Supported languages for evaluation, using ISO standard language codes.
9
+
10
+ .. admonition:: Example:
11
+
12
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
13
+ :start-after: [START supported_languages]
14
+ :end-before: [END supported_languages]
15
+ :language: python
16
+ :dedent: 8
17
+ :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
18
+ """
19
+
20
+ Spanish = "es"
21
+ Italian = "it"
22
+ French = "fr"
23
+ German = "de"
24
+ SimplifiedChinese = "zh-cn"
25
+ Portuguese = "pt"
26
+ Japanese = "ja"
27
+ English = "en"