azure-ai-evaluation 1.0.0__py3-none-any.whl → 1.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (105) hide show
  1. azure/ai/evaluation/__init__.py +5 -31
  2. azure/ai/evaluation/_common/constants.py +2 -9
  3. azure/ai/evaluation/_common/rai_service.py +120 -300
  4. azure/ai/evaluation/_common/utils.py +23 -381
  5. azure/ai/evaluation/_constants.py +6 -19
  6. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/__init__.py +2 -3
  7. azure/ai/evaluation/_evaluate/{_batch_run/eval_run_context.py → _batch_run_client/batch_run_context.py} +7 -23
  8. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/code_client.py +17 -33
  9. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/proxy_client.py +4 -32
  10. azure/ai/evaluation/_evaluate/_eval_run.py +24 -81
  11. azure/ai/evaluation/_evaluate/_evaluate.py +239 -393
  12. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +17 -17
  13. azure/ai/evaluation/_evaluate/_utils.py +28 -82
  14. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +18 -17
  15. azure/ai/evaluation/_evaluators/{_retrieval → _chat}/__init__.py +2 -2
  16. azure/ai/evaluation/_evaluators/_chat/_chat.py +357 -0
  17. azure/ai/evaluation/_evaluators/{_service_groundedness → _chat/retrieval}/__init__.py +2 -2
  18. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +157 -0
  19. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
  20. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +88 -78
  21. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +39 -76
  22. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +4 -0
  23. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +67 -105
  24. azure/ai/evaluation/_evaluators/{_multimodal/_content_safety_multimodal_base.py → _content_safety/_content_safety_base.py} +34 -24
  25. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +301 -0
  26. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +54 -105
  27. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +52 -99
  28. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +52 -101
  29. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +51 -101
  30. azure/ai/evaluation/_evaluators/_eci/_eci.py +54 -44
  31. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +19 -34
  32. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +89 -76
  33. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +41 -66
  34. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +16 -14
  35. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +87 -113
  36. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
  37. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +27 -20
  38. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +80 -89
  39. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
  40. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
  41. azure/ai/evaluation/_evaluators/_qa/_qa.py +30 -23
  42. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +96 -84
  43. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +47 -78
  44. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +27 -26
  45. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +38 -53
  46. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +5 -0
  47. azure/ai/evaluation/_evaluators/_xpia/xpia.py +105 -91
  48. azure/ai/evaluation/_exceptions.py +7 -28
  49. azure/ai/evaluation/_http_utils.py +132 -203
  50. azure/ai/evaluation/_model_configurations.py +8 -104
  51. azure/ai/evaluation/_version.py +1 -1
  52. azure/ai/evaluation/simulator/__init__.py +1 -2
  53. azure/ai/evaluation/simulator/_adversarial_scenario.py +1 -20
  54. azure/ai/evaluation/simulator/_adversarial_simulator.py +92 -111
  55. azure/ai/evaluation/simulator/_constants.py +1 -11
  56. azure/ai/evaluation/simulator/_conversation/__init__.py +12 -13
  57. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
  58. azure/ai/evaluation/simulator/_direct_attack_simulator.py +67 -33
  59. azure/ai/evaluation/simulator/_helpers/__init__.py +2 -1
  60. azure/ai/evaluation/{_common → simulator/_helpers}/_experimental.py +9 -24
  61. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +5 -26
  62. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +94 -107
  63. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +22 -70
  64. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +11 -28
  65. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +4 -8
  66. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +24 -68
  67. azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
  68. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +10 -6
  69. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +5 -6
  70. azure/ai/evaluation/simulator/_simulator.py +207 -277
  71. azure/ai/evaluation/simulator/_tracing.py +4 -4
  72. azure/ai/evaluation/simulator/_utils.py +13 -31
  73. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +449 -0
  74. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +99 -0
  75. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/WHEEL +1 -1
  76. azure/ai/evaluation/_common/math.py +0 -89
  77. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -46
  78. azure/ai/evaluation/_evaluators/_common/__init__.py +0 -13
  79. azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -344
  80. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -88
  81. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -133
  82. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -113
  83. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -99
  84. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  85. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  86. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  87. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  88. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  89. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  90. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  91. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -112
  92. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -93
  93. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -148
  94. azure/ai/evaluation/_vendor/__init__.py +0 -3
  95. azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -14
  96. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -328
  97. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -63
  98. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -63
  99. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -53
  100. azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  101. azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -1150
  102. azure_ai_evaluation-1.0.0.dist-info/METADATA +0 -595
  103. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +0 -70
  104. azure_ai_evaluation-1.0.0.dist-info/RECORD +0 -119
  105. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/top_level.txt +0 -0
@@ -6,23 +6,20 @@
6
6
  import asyncio
7
7
  import logging
8
8
  import random
9
- from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
10
- from itertools import zip_longest
9
+ from typing import Any, Callable, Dict, List, Optional
11
10
 
12
11
  from tqdm import tqdm
13
12
 
14
- from azure.ai.evaluation._common._experimental import experimental
15
- from azure.ai.evaluation._common.utils import validate_azure_ai_project
16
13
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
14
  from azure.ai.evaluation._http_utils import get_async_http_client
18
15
  from azure.ai.evaluation._model_configurations import AzureAIProject
19
16
  from azure.ai.evaluation.simulator import AdversarialScenario
20
17
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
21
- from azure.core.credentials import TokenCredential
22
18
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
19
+ from azure.identity import DefaultAzureCredential
23
20
 
24
21
  from ._constants import SupportedLanguages
25
- from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
22
+ from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
26
23
  from ._conversation._conversation import simulate_conversation
27
24
  from ._model_tools import (
28
25
  AdversarialTemplateHandler,
@@ -31,13 +28,11 @@ from ._model_tools import (
31
28
  RAIClient,
32
29
  TokenScope,
33
30
  )
34
- from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
35
31
  from ._utils import JsonLineList
36
32
 
37
33
  logger = logging.getLogger(__name__)
38
34
 
39
35
 
40
- @experimental
41
36
  class AdversarialSimulator:
42
37
  """
43
38
  Initializes the adversarial simulator with a project scope.
@@ -47,40 +42,43 @@ class AdversarialSimulator:
47
42
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
48
43
  :param credential: The credential for connecting to Azure AI project.
49
44
  :type credential: ~azure.core.credentials.TokenCredential
50
-
51
- .. admonition:: Example:
52
-
53
- .. literalinclude:: ../samples/evaluation_samples_simulate.py
54
- :start-after: [START adversarial_scenario]
55
- :end-before: [END adversarial_scenario]
56
- :language: python
57
- :dedent: 8
58
- :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
59
- 2 conversation turns each (4 messages per result).
60
45
  """
61
46
 
62
- def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
47
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
63
48
  """Constructor."""
64
-
65
- try:
66
- self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
67
- except EvaluationException as e:
49
+ # check if azure_ai_project has the keys: subscription_id, resource_group_name and project_name
50
+ if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
51
+ msg = "azure_ai_project must contain keys: subscription_id, resource_group_name, project_name"
68
52
  raise EvaluationException(
69
- message=e.message,
70
- internal_message=e.internal_message,
53
+ message=msg,
54
+ internal_message=msg,
71
55
  target=ErrorTarget.ADVERSARIAL_SIMULATOR,
72
- category=e.category,
73
- blame=e.blame,
74
- ) from e
75
-
56
+ category=ErrorCategory.MISSING_FIELD,
57
+ blame=ErrorBlame.USER_ERROR,
58
+ )
59
+ # check the value of the keys in azure_ai_project is not none
60
+ if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
61
+ msg = "subscription_id, resource_group_name and project_name cannot be None"
62
+ raise EvaluationException(
63
+ message=msg,
64
+ internal_message=msg,
65
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
66
+ category=ErrorCategory.MISSING_FIELD,
67
+ blame=ErrorBlame.USER_ERROR,
68
+ )
69
+ if "credential" not in azure_ai_project and not credential:
70
+ credential = DefaultAzureCredential()
71
+ elif "credential" in azure_ai_project:
72
+ credential = azure_ai_project["credential"]
73
+ self.azure_ai_project = azure_ai_project
76
74
  self.token_manager = ManagedIdentityAPITokenManager(
77
75
  token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
78
76
  logger=logging.getLogger("AdversarialSimulator"),
79
- credential=cast(TokenCredential, credential),
77
+ credential=credential,
80
78
  )
81
- self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
79
+ self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
82
80
  self.adversarial_template_handler = AdversarialTemplateHandler(
83
- azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
81
+ azure_ai_project=azure_ai_project, rai_client=self.rai_client
84
82
  )
85
83
 
86
84
  def _ensure_service_dependencies(self):
@@ -94,7 +92,7 @@ class AdversarialSimulator:
94
92
  blame=ErrorBlame.USER_ERROR,
95
93
  )
96
94
 
97
- # pylint: disable=too-many-locals
95
+ # @monitor_adversarial_scenario
98
96
  async def __call__(
99
97
  self,
100
98
  *,
@@ -108,10 +106,10 @@ class AdversarialSimulator:
108
106
  api_call_retry_sleep_sec: int = 1,
109
107
  api_call_delay_sec: int = 0,
110
108
  concurrent_async_task: int = 3,
109
+ _jailbreak_type: Optional[str] = None,
111
110
  language: SupportedLanguages = SupportedLanguages.English,
112
111
  randomize_order: bool = True,
113
112
  randomization_seed: Optional[int] = None,
114
- **kwargs,
115
113
  ):
116
114
  """
117
115
  Executes the adversarial simulation against a specified target function asynchronously.
@@ -161,6 +159,28 @@ class AdversarialSimulator:
161
159
 
162
160
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
163
161
  :rtype: List[Dict[str, Any]]
162
+
163
+ **Output format**
164
+
165
+ .. code-block:: python
166
+
167
+ return_value = [
168
+ {
169
+ 'template_parameters': {},
170
+ 'messages': [
171
+ {
172
+ 'content': '<jailbreak prompt> <adversarial query>',
173
+ 'role': 'user'
174
+ },
175
+ {
176
+ 'content': "<response from endpoint>",
177
+ 'role': 'assistant',
178
+ 'context': None
179
+ }
180
+ ],
181
+ '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
182
+ }
183
+ ]
164
184
  """
165
185
 
166
186
  # validate the inputs
@@ -196,7 +216,6 @@ class AdversarialSimulator:
196
216
  total_tasks,
197
217
  )
198
218
  total_tasks = min(total_tasks, max_simulation_results)
199
- _jailbreak_type = kwargs.get("_jailbreak_type", None)
200
219
  if _jailbreak_type:
201
220
  jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
202
221
  progress_bar = tqdm(
@@ -205,18 +224,17 @@ class AdversarialSimulator:
205
224
  ncols=100,
206
225
  unit="simulations",
207
226
  )
208
-
209
- if randomize_order:
210
- # The template parameter lists are persistent across sim runs within a session,
211
- # So randomize a the selection instead of the parameter list directly,
212
- # or a potentially large deep copy.
213
- if randomization_seed is not None:
214
- random.seed(randomization_seed)
215
- random.shuffle(templates)
216
- parameter_lists = [t.template_parameters for t in templates]
217
- zipped_parameters = list(zip_longest(*parameter_lists))
218
- for param_group in zipped_parameters:
219
- for template, parameter in zip(templates, param_group):
227
+ for template in templates:
228
+ parameter_order = list(range(len(template.template_parameters)))
229
+ if randomize_order:
230
+ # The template parameter lists are persistent across sim runs within a session,
231
+ # So randomize a the selection instead of the parameter list directly,
232
+ # or a potentially large deep copy.
233
+ if randomization_seed is not None:
234
+ random.seed(randomization_seed)
235
+ random.shuffle(parameter_order)
236
+ for index in parameter_order:
237
+ parameter = template.template_parameters[index].copy()
220
238
  if _jailbreak_type == "upia":
221
239
  parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
222
240
  tasks.append(
@@ -245,21 +263,16 @@ class AdversarialSimulator:
245
263
 
246
264
  return JsonLineList(sim_results)
247
265
 
248
- def _to_chat_protocol(
249
- self,
250
- *,
251
- conversation_history: List[ConversationTurn],
252
- template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
253
- ):
266
+ def _to_chat_protocol(self, *, conversation_history, template_parameters: Dict = None):
254
267
  if template_parameters is None:
255
268
  template_parameters = {}
256
269
  messages = []
257
270
  for _, m in enumerate(conversation_history):
258
271
  message = {"content": m.message, "role": m.role.value}
259
- if m.full_response is not None and "context" in m.full_response:
272
+ if "context" in m.full_response:
260
273
  message["context"] = m.full_response["context"]
261
274
  messages.append(message)
262
- conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
275
+ conversation_category = template_parameters.pop("metadata", {}).get("Category")
263
276
  template_parameters["metadata"] = {}
264
277
  for key in (
265
278
  "conversation_starter",
@@ -267,9 +280,6 @@ class AdversarialSimulator:
267
280
  "target_population",
268
281
  "topic",
269
282
  "ch_template_placeholder",
270
- "chatbot_name",
271
- "name",
272
- "group",
273
283
  ):
274
284
  template_parameters.pop(key, None)
275
285
  if conversation_category:
@@ -284,14 +294,14 @@ class AdversarialSimulator:
284
294
  self,
285
295
  *,
286
296
  target: Callable,
287
- template: AdversarialTemplate,
288
- parameters: TemplateParameters,
289
- max_conversation_turns: int,
290
- api_call_retry_limit: int,
291
- api_call_retry_sleep_sec: int,
292
- api_call_delay_sec: int,
293
- language: SupportedLanguages,
294
- semaphore: asyncio.Semaphore,
297
+ template,
298
+ parameters,
299
+ max_conversation_turns,
300
+ api_call_retry_limit,
301
+ api_call_retry_sleep_sec,
302
+ api_call_delay_sec,
303
+ language,
304
+ semaphore,
295
305
  ) -> List[Dict]:
296
306
  user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
297
307
  system_bot = self._setup_bot(
@@ -314,15 +324,9 @@ class AdversarialSimulator:
314
324
  api_call_delay_sec=api_call_delay_sec,
315
325
  language=language,
316
326
  )
327
+ return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)
317
328
 
318
- return self._to_chat_protocol(
319
- conversation_history=conversation_history,
320
- template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
321
- )
322
-
323
- def _get_user_proxy_completion_model(
324
- self, template_key: str, template_parameters: TemplateParameters
325
- ) -> ProxyChatCompletionsModel:
329
+ def _get_user_proxy_completion_model(self, template_key, template_parameters):
326
330
  return ProxyChatCompletionsModel(
327
331
  name="raisvc_proxy_model",
328
332
  template_key=template_key,
@@ -334,15 +338,8 @@ class AdversarialSimulator:
334
338
  temperature=0.0,
335
339
  )
336
340
 
337
- def _setup_bot(
338
- self,
339
- *,
340
- role: ConversationRole,
341
- template: AdversarialTemplate,
342
- parameters: TemplateParameters,
343
- target: Optional[Callable] = None,
344
- ) -> ConversationBot:
345
- if role is ConversationRole.USER:
341
+ def _setup_bot(self, *, role, template, parameters, target: Callable = None):
342
+ if role == ConversationRole.USER:
346
343
  model = self._get_user_proxy_completion_model(
347
344
  template_key=template.template_name, template_parameters=parameters
348
345
  )
@@ -353,46 +350,30 @@ class AdversarialSimulator:
353
350
  instantiation_parameters=parameters,
354
351
  )
355
352
 
356
- if role is ConversationRole.ASSISTANT:
357
- if target is None:
358
- msg = "Cannot setup system bot. Target is None"
353
+ if role == ConversationRole.ASSISTANT:
359
354
 
360
- raise EvaluationException(
361
- message=msg,
362
- internal_message=msg,
363
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
364
- error_category=ErrorCategory.INVALID_VALUE,
365
- blame=ErrorBlame.SYSTEM_ERROR,
366
- )
367
-
368
- class DummyModel:
369
- def __init__(self):
370
- self.name = "dummy_model"
371
-
372
- def __call__(self) -> None:
373
- pass
355
+ def dummy_model() -> None:
356
+ return None
374
357
 
358
+ dummy_model.name = "dummy_model"
375
359
  return CallbackConversationBot(
376
360
  callback=target,
377
361
  role=role,
378
- model=DummyModel(),
362
+ model=dummy_model,
379
363
  user_template=str(template),
380
364
  user_template_parameters=parameters,
381
365
  conversation_template="",
382
366
  instantiation_parameters={},
383
367
  )
384
-
385
- msg = "Invalid value for enum ConversationRole. This should never happen."
386
- raise EvaluationException(
387
- message=msg,
388
- internal_message=msg,
389
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
390
- category=ErrorCategory.INVALID_VALUE,
391
- blame=ErrorBlame.SYSTEM_ERROR,
368
+ return ConversationBot(
369
+ role=role,
370
+ model=model,
371
+ conversation_template=template,
372
+ instantiation_parameters=parameters,
392
373
  )
393
374
 
394
- def _join_conversation_starter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
395
- key: Literal["conversation_starter"] = "conversation_starter"
375
+ def _join_conversation_starter(self, parameters, to_join):
376
+ key = "conversation_starter"
396
377
  if key in parameters.keys():
397
378
  parameters[key] = f"{to_join} {parameters[key]}"
398
379
  else:
@@ -5,17 +5,7 @@ from enum import Enum
5
5
 
6
6
 
7
7
  class SupportedLanguages(Enum):
8
- """Supported languages for evaluation, using ISO standard language codes.
9
-
10
- .. admonition:: Example:
11
-
12
- .. literalinclude:: ../samples/evaluation_samples_simulate.py
13
- :start-after: [START supported_languages]
14
- :end-before: [END supported_languages]
15
- :language: python
16
- :dedent: 8
17
- :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
18
- """
8
+ """Supported languages for evaluation, using ISO standard language codes."""
19
9
 
20
10
  Spanish = "es"
21
11
  Italian = "it"
@@ -7,7 +7,7 @@ import copy
7
7
  import logging
8
8
  import time
9
9
  from dataclasses import dataclass
10
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
11
 
12
12
  import jinja2
13
13
 
@@ -15,7 +15,6 @@ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarg
15
15
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
16
16
 
17
17
  from .._model_tools import LLMBase, OpenAIChatCompletionsModel
18
- from .._model_tools._template_handler import TemplateParameters
19
18
  from .constants import ConversationRole
20
19
 
21
20
 
@@ -41,7 +40,7 @@ class ConversationTurn:
41
40
  role: "ConversationRole"
42
41
  name: Optional[str] = None
43
42
  message: str = ""
44
- full_response: Optional[Dict[str, Any]] = None
43
+ full_response: Optional[Any] = None
45
44
  request: Optional[Any] = None
46
45
 
47
46
  def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
@@ -110,7 +109,7 @@ class ConversationBot:
110
109
  role: ConversationRole,
111
110
  model: Union[LLMBase, OpenAIChatCompletionsModel],
112
111
  conversation_template: str,
113
- instantiation_parameters: TemplateParameters,
112
+ instantiation_parameters: Dict[str, str],
114
113
  ) -> None:
115
114
  self.role = role
116
115
  self.conversation_template_orig = conversation_template
@@ -119,13 +118,13 @@ class ConversationBot:
119
118
  )
120
119
  self.persona_template_args = instantiation_parameters
121
120
  if self.role == ConversationRole.USER:
122
- self.name: str = cast(str, self.persona_template_args.get("name", role.value))
121
+ self.name = self.persona_template_args.get("name", role.value)
123
122
  else:
124
- self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
123
+ self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
125
124
  self.model = model
126
125
 
127
126
  self.logger = logging.getLogger(repr(self))
128
- self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
127
+ self.conversation_starter = None # can either be a dictionary or jinja template
129
128
  if role == ConversationRole.USER:
130
129
  if "conversation_starter" in self.persona_template_args:
131
130
  conversation_starter_content = self.persona_template_args["conversation_starter"]
@@ -149,7 +148,7 @@ class ConversationBot:
149
148
  conversation_history: List[ConversationTurn],
150
149
  max_history: int,
151
150
  turn_number: int = 0,
152
- ) -> Tuple[dict, dict, float, dict]:
151
+ ) -> Tuple[dict, dict, int, dict]:
153
152
  """
154
153
  Prompt the ConversationBot for a response.
155
154
 
@@ -162,7 +161,7 @@ class ConversationBot:
162
161
  :param turn_number: Parameters used to query GPT-4 model.
163
162
  :type turn_number: int
164
163
  :return: The response from the ConversationBot.
165
- :rtype: Tuple[dict, dict, float, dict]
164
+ :rtype: Tuple[dict, dict, int, dict]
166
165
  """
167
166
 
168
167
  # check if this is the first turn and the conversation_starter is not None,
@@ -170,11 +169,11 @@ class ConversationBot:
170
169
  if turn_number == 0 and self.conversation_starter is not None:
171
170
  # if conversation_starter is a dictionary, pass it into samples as is
172
171
  if isinstance(self.conversation_starter, dict):
173
- samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
172
+ samples = [self.conversation_starter]
174
173
  if isinstance(self.conversation_starter, jinja2.Template):
175
174
  samples = [self.conversation_starter.render(**self.persona_template_args)]
176
175
  else:
177
- samples = [self.conversation_starter]
176
+ samples = [self.conversation_starter] # type: ignore[attr-defined]
178
177
  time_taken = 0
179
178
 
180
179
  finish_reason = ["stop"]
@@ -239,7 +238,7 @@ class CallbackConversationBot(ConversationBot):
239
238
  self,
240
239
  callback: Callable,
241
240
  user_template: str,
242
- user_template_parameters: TemplateParameters,
241
+ user_template_parameters: Dict,
243
242
  *args,
244
243
  **kwargs,
245
244
  ) -> None:
@@ -255,7 +254,7 @@ class CallbackConversationBot(ConversationBot):
255
254
  conversation_history: List[Any],
256
255
  max_history: int,
257
256
  turn_number: int = 0,
258
- ) -> Tuple[dict, dict, float, dict]:
257
+ ) -> Tuple[dict, dict, int, dict]:
259
258
  chat_protocol_message = self._to_chat_protocol(
260
259
  self.user_template, conversation_history, self.user_template_parameters
261
260
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  import asyncio
6
6
  import logging
7
- from typing import Callable, Dict, List, Optional, Tuple, Union
7
+ from typing import Callable, Dict, List, Tuple, Union
8
8
 
9
9
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
10
  from azure.ai.evaluation.simulator._constants import SupportedLanguages
@@ -80,7 +80,7 @@ async def simulate_conversation(
80
80
  history_limit: int = 5,
81
81
  api_call_delay_sec: float = 0,
82
82
  logger: logging.Logger = logging.getLogger(__name__),
83
- ) -> Tuple[Optional[str], List[ConversationTurn]]:
83
+ ) -> Tuple:
84
84
  """
85
85
  Simulate a conversation between the given bots.
86
86
 
@@ -99,7 +99,7 @@ async def simulate_conversation(
99
99
  :keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
100
100
  :paramtype logger: logging.Logger
101
101
  :return: Simulation a conversation between the given bots.
102
- :rtype: Tuple[Optional[str], List[ConversationTurn]]
102
+ :rtype: Tuple
103
103
  """
104
104
 
105
105
  # Read the first prompt.
@@ -110,7 +110,7 @@ async def simulate_conversation(
110
110
  turn_number=0,
111
111
  )
112
112
  if "id" in first_response:
113
- conversation_id: Optional[str] = first_response["id"]
113
+ conversation_id = first_response["id"]
114
114
  else:
115
115
  conversation_id = None
116
116
  first_prompt = first_response["samples"][0]
@@ -1,18 +1,18 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- # pylint: disable=C0301,C0114,R0913,R0903
5
4
  # noqa: E501
5
+ import functools
6
6
  import logging
7
7
  from random import randint
8
- from typing import Callable, Optional, cast
8
+ from typing import Callable, Optional
9
+
10
+ from promptflow._sdk._telemetry import ActivityType, monitor_operation
9
11
 
10
- from azure.ai.evaluation._common._experimental import experimental
11
- from azure.ai.evaluation._common.utils import validate_azure_ai_project
12
12
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
- from azure.ai.evaluation.simulator import AdversarialScenario
14
13
  from azure.ai.evaluation._model_configurations import AzureAIProject
15
- from azure.core.credentials import TokenCredential
14
+ from azure.ai.evaluation.simulator import AdversarialScenario
15
+ from azure.identity import DefaultAzureCredential
16
16
 
17
17
  from ._adversarial_simulator import AdversarialSimulator
18
18
  from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
@@ -20,7 +20,35 @@ from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenMan
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
- @experimental
23
+ def monitor_adversarial_scenario(func) -> Callable:
24
+ """Decorator to monitor adversarial scenario.
25
+
26
+ :param func: The function to be decorated.
27
+ :type func: Callable
28
+ :return: The decorated function.
29
+ :rtype: Callable
30
+ """
31
+
32
+ @functools.wraps(func)
33
+ def wrapper(*args, **kwargs):
34
+ scenario = str(kwargs.get("scenario", None))
35
+ max_conversation_turns = kwargs.get("max_conversation_turns", None)
36
+ max_simulation_results = kwargs.get("max_simulation_results", None)
37
+ decorated_func = monitor_operation(
38
+ activity_name="jailbreak.adversarial.simulator.call",
39
+ activity_type=ActivityType.PUBLICAPI,
40
+ custom_dimensions={
41
+ "scenario": scenario,
42
+ "max_conversation_turns": max_conversation_turns,
43
+ "max_simulation_results": max_simulation_results,
44
+ },
45
+ )(func)
46
+
47
+ return decorated_func(*args, **kwargs)
48
+
49
+ return wrapper
50
+
51
+
24
52
  class DirectAttackSimulator:
25
53
  """
26
54
  Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
@@ -31,39 +59,44 @@ class DirectAttackSimulator:
31
59
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
32
60
  :param credential: The credential for connecting to Azure AI project.
33
61
  :type credential: ~azure.core.credentials.TokenCredential
34
-
35
- .. admonition:: Example:
36
-
37
- .. literalinclude:: ../samples/evaluation_samples_simulate.py
38
- :start-after: [START direct_attack_simulator]
39
- :end-before: [END direct_attack_simulator]
40
- :language: python
41
- :dedent: 8
42
- :caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
43
62
  """
44
63
 
45
- def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
64
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
46
65
  """Constructor."""
47
-
48
- try:
49
- self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
50
- except EvaluationException as e:
66
+ # check if azure_ai_project has the keys: subscription_id, resource_group_name, project_name, credential
67
+ if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
68
+ msg = "azure_ai_project must contain keys: subscription_id, resource_group_name and project_name"
51
69
  raise EvaluationException(
52
- message=e.message,
53
- internal_message=e.internal_message,
70
+ message=msg,
71
+ internal_message=msg,
72
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
73
+ category=ErrorCategory.MISSING_FIELD,
74
+ blame=ErrorBlame.USER_ERROR,
75
+ )
76
+ # check the value of the keys in azure_ai_project is not none
77
+ if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
78
+ msg = "subscription_id, resource_group_name and project_name keys cannot be None"
79
+ raise EvaluationException(
80
+ message=msg,
81
+ internal_message=msg,
54
82
  target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
55
- category=e.category,
56
- blame=e.blame,
57
- ) from e
58
- self.credential = cast(TokenCredential, credential)
83
+ category=ErrorCategory.MISSING_FIELD,
84
+ blame=ErrorBlame.USER_ERROR,
85
+ )
86
+ if "credential" not in azure_ai_project and not credential:
87
+ credential = DefaultAzureCredential()
88
+ elif "credential" in azure_ai_project:
89
+ credential = azure_ai_project["credential"]
90
+ self.credential = credential
91
+ self.azure_ai_project = azure_ai_project
59
92
  self.token_manager = ManagedIdentityAPITokenManager(
60
93
  token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
61
94
  logger=logging.getLogger("AdversarialSimulator"),
62
- credential=self.credential,
95
+ credential=credential,
63
96
  )
64
- self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
97
+ self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
65
98
  self.adversarial_template_handler = AdversarialTemplateHandler(
66
- azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
99
+ azure_ai_project=azure_ai_project, rai_client=self.rai_client
67
100
  )
68
101
 
69
102
  def _ensure_service_dependencies(self):
@@ -77,6 +110,7 @@ class DirectAttackSimulator:
77
110
  blame=ErrorBlame.USER_ERROR,
78
111
  )
79
112
 
113
+ # @monitor_adversarial_scenario
80
114
  async def __call__(
81
115
  self,
82
116
  *,
@@ -135,7 +169,7 @@ class DirectAttackSimulator:
135
169
  - '**$schema**': A string indicating the schema URL for the conversation format.
136
170
 
137
171
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
138
- :rtype: Dict[str, [List[Dict[str, Any]]]]
172
+ :rtype: Dict[str, [List[Dict[str, Any]]]] with two elements
139
173
 
140
174
  **Output format**
141
175
 
@@ -198,7 +232,7 @@ class DirectAttackSimulator:
198
232
  api_call_retry_sleep_sec=api_call_retry_sleep_sec,
199
233
  api_call_delay_sec=api_call_delay_sec,
200
234
  concurrent_async_task=concurrent_async_task,
201
- randomize_order=False,
235
+ randomize_order=True,
202
236
  randomization_seed=randomization_seed,
203
237
  )
204
238
  jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
@@ -212,7 +246,7 @@ class DirectAttackSimulator:
212
246
  api_call_delay_sec=api_call_delay_sec,
213
247
  concurrent_async_task=concurrent_async_task,
214
248
  _jailbreak_type="upia",
215
- randomize_order=False,
249
+ randomize_order=True,
216
250
  randomization_seed=randomization_seed,
217
251
  )
218
252
  return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
@@ -1,4 +1,5 @@
1
+ from ._experimental import experimental
1
2
  from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
2
3
  from ._simulator_data_classes import ConversationHistory, Turn
3
4
 
4
- __all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
5
+ __all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING", "experimental"]