azure-ai-evaluation 1.0.0__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (108) hide show
  1. azure/ai/evaluation/__init__.py +4 -26
  2. azure/ai/evaluation/_common/constants.py +2 -9
  3. azure/ai/evaluation/_common/rai_service.py +122 -302
  4. azure/ai/evaluation/_common/utils.py +35 -393
  5. azure/ai/evaluation/_constants.py +6 -28
  6. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/__init__.py +2 -3
  7. azure/ai/evaluation/_evaluate/{_batch_run/eval_run_context.py → _batch_run_client/batch_run_context.py} +8 -25
  8. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/code_client.py +30 -68
  9. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +61 -0
  10. azure/ai/evaluation/_evaluate/_eval_run.py +40 -117
  11. azure/ai/evaluation/_evaluate/_evaluate.py +255 -416
  12. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +19 -24
  13. azure/ai/evaluation/_evaluate/_utils.py +47 -108
  14. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +19 -18
  15. azure/ai/evaluation/_evaluators/{_retrieval → _chat}/__init__.py +2 -2
  16. azure/ai/evaluation/_evaluators/_chat/_chat.py +350 -0
  17. azure/ai/evaluation/_evaluators/{_service_groundedness → _chat/retrieval}/__init__.py +2 -2
  18. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +163 -0
  19. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
  20. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +93 -78
  21. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +39 -76
  22. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +4 -0
  23. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +68 -104
  24. azure/ai/evaluation/_evaluators/{_multimodal/_content_safety_multimodal_base.py → _content_safety/_content_safety_base.py} +35 -24
  25. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +296 -0
  26. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +54 -105
  27. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +52 -99
  28. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +52 -101
  29. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +51 -101
  30. azure/ai/evaluation/_evaluators/_eci/_eci.py +55 -45
  31. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -36
  32. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +94 -76
  33. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +41 -66
  34. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +17 -15
  35. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +92 -113
  36. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
  37. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +27 -21
  38. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +80 -89
  39. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
  40. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
  41. azure/ai/evaluation/_evaluators/_qa/_qa.py +43 -25
  42. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +101 -84
  43. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +47 -78
  44. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +27 -27
  45. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +45 -55
  46. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +5 -0
  47. azure/ai/evaluation/_evaluators/_xpia/xpia.py +106 -91
  48. azure/ai/evaluation/_exceptions.py +7 -28
  49. azure/ai/evaluation/_http_utils.py +134 -205
  50. azure/ai/evaluation/_model_configurations.py +8 -104
  51. azure/ai/evaluation/_version.py +1 -1
  52. azure/ai/evaluation/simulator/__init__.py +2 -3
  53. azure/ai/evaluation/simulator/_adversarial_scenario.py +1 -20
  54. azure/ai/evaluation/simulator/_adversarial_simulator.py +95 -116
  55. azure/ai/evaluation/simulator/_constants.py +1 -11
  56. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -14
  57. azure/ai/evaluation/simulator/_conversation/_conversation.py +20 -20
  58. azure/ai/evaluation/simulator/_direct_attack_simulator.py +68 -34
  59. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -1
  60. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +28 -31
  61. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +95 -108
  62. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +22 -70
  63. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +14 -30
  64. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +14 -25
  65. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +24 -68
  66. azure/ai/evaluation/simulator/_model_tools/models.py +21 -19
  67. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +10 -6
  68. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +5 -6
  69. azure/ai/evaluation/simulator/_tracing.py +28 -25
  70. azure/ai/evaluation/simulator/_utils.py +13 -34
  71. azure/ai/evaluation/simulator/simulator.py +579 -0
  72. azure_ai_evaluation-1.0.0b1.dist-info/METADATA +377 -0
  73. azure_ai_evaluation-1.0.0b1.dist-info/RECORD +97 -0
  74. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b1.dist-info}/WHEEL +1 -1
  75. azure/ai/evaluation/_common/_experimental.py +0 -172
  76. azure/ai/evaluation/_common/math.py +0 -89
  77. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -99
  78. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -46
  79. azure/ai/evaluation/_evaluators/_common/__init__.py +0 -13
  80. azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -344
  81. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -88
  82. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -133
  83. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -113
  84. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -99
  85. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  86. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  87. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  88. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  89. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  90. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  91. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  92. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -112
  93. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -93
  94. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -148
  95. azure/ai/evaluation/_vendor/__init__.py +0 -3
  96. azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -14
  97. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -328
  98. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -63
  99. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -63
  100. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -53
  101. azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  102. azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -1150
  103. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  104. azure/ai/evaluation/simulator/_simulator.py +0 -716
  105. azure_ai_evaluation-1.0.0.dist-info/METADATA +0 -595
  106. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +0 -70
  107. azure_ai_evaluation-1.0.0.dist-info/RECORD +0 -119
  108. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -6,23 +6,19 @@
6
6
  import asyncio
7
7
  import logging
8
8
  import random
9
- from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
10
- from itertools import zip_longest
9
+ from typing import Any, Callable, Dict, List, Optional
11
10
 
11
+ from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
12
+ from azure.identity import DefaultAzureCredential
12
13
  from tqdm import tqdm
13
14
 
14
- from azure.ai.evaluation._common._experimental import experimental
15
- from azure.ai.evaluation._common.utils import validate_azure_ai_project
16
- from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
15
  from azure.ai.evaluation._http_utils import get_async_http_client
16
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
18
17
  from azure.ai.evaluation._model_configurations import AzureAIProject
19
18
  from azure.ai.evaluation.simulator import AdversarialScenario
20
19
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
21
- from azure.core.credentials import TokenCredential
22
- from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
23
20
 
24
- from ._constants import SupportedLanguages
25
- from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
21
+ from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
26
22
  from ._conversation._conversation import simulate_conversation
27
23
  from ._model_tools import (
28
24
  AdversarialTemplateHandler,
@@ -31,13 +27,13 @@ from ._model_tools import (
31
27
  RAIClient,
32
28
  TokenScope,
33
29
  )
34
- from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
30
+ from ._tracing import monitor_adversarial_scenario
35
31
  from ._utils import JsonLineList
32
+ from ._constants import SupportedLanguages
36
33
 
37
34
  logger = logging.getLogger(__name__)
38
35
 
39
36
 
40
- @experimental
41
37
  class AdversarialSimulator:
42
38
  """
43
39
  Initializes the adversarial simulator with a project scope.
@@ -47,40 +43,43 @@ class AdversarialSimulator:
47
43
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
48
44
  :param credential: The credential for connecting to Azure AI project.
49
45
  :type credential: ~azure.core.credentials.TokenCredential
50
-
51
- .. admonition:: Example:
52
-
53
- .. literalinclude:: ../samples/evaluation_samples_simulate.py
54
- :start-after: [START adversarial_scenario]
55
- :end-before: [END adversarial_scenario]
56
- :language: python
57
- :dedent: 8
58
- :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
59
- 2 conversation turns each (4 messages per result).
60
46
  """
61
47
 
62
- def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
48
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
63
49
  """Constructor."""
64
-
65
- try:
66
- self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
67
- except EvaluationException as e:
50
+ # check if azure_ai_project has the keys: subscription_id, resource_group_name and project_name
51
+ if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
52
+ msg = "azure_ai_project must contain keys: subscription_id, resource_group_name, project_name"
68
53
  raise EvaluationException(
69
- message=e.message,
70
- internal_message=e.internal_message,
54
+ message=msg,
55
+ internal_message=msg,
71
56
  target=ErrorTarget.ADVERSARIAL_SIMULATOR,
72
- category=e.category,
73
- blame=e.blame,
74
- ) from e
75
-
57
+ category=ErrorCategory.MISSING_FIELD,
58
+ blame=ErrorBlame.USER_ERROR,
59
+ )
60
+ # check the value of the keys in azure_ai_project is not none
61
+ if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
62
+ msg = "subscription_id, resource_group_name and project_name cannot be None"
63
+ raise EvaluationException(
64
+ message=msg,
65
+ internal_message=msg,
66
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
67
+ category=ErrorCategory.MISSING_FIELD,
68
+ blame=ErrorBlame.USER_ERROR,
69
+ )
70
+ if "credential" not in azure_ai_project and not credential:
71
+ credential = DefaultAzureCredential()
72
+ elif "credential" in azure_ai_project:
73
+ credential = azure_ai_project["credential"]
74
+ self.azure_ai_project = azure_ai_project
76
75
  self.token_manager = ManagedIdentityAPITokenManager(
77
76
  token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
78
77
  logger=logging.getLogger("AdversarialSimulator"),
79
- credential=cast(TokenCredential, credential),
78
+ credential=credential,
80
79
  )
81
- self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
80
+ self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
82
81
  self.adversarial_template_handler = AdversarialTemplateHandler(
83
- azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
82
+ azure_ai_project=azure_ai_project, rai_client=self.rai_client
84
83
  )
85
84
 
86
85
  def _ensure_service_dependencies(self):
@@ -94,7 +93,7 @@ class AdversarialSimulator:
94
93
  blame=ErrorBlame.USER_ERROR,
95
94
  )
96
95
 
97
- # pylint: disable=too-many-locals
96
+ # @monitor_adversarial_scenario
98
97
  async def __call__(
99
98
  self,
100
99
  *,
@@ -108,10 +107,10 @@ class AdversarialSimulator:
108
107
  api_call_retry_sleep_sec: int = 1,
109
108
  api_call_delay_sec: int = 0,
110
109
  concurrent_async_task: int = 3,
110
+ _jailbreak_type: Optional[str] = None,
111
111
  language: SupportedLanguages = SupportedLanguages.English,
112
112
  randomize_order: bool = True,
113
113
  randomization_seed: Optional[int] = None,
114
- **kwargs,
115
114
  ):
116
115
  """
117
116
  Executes the adversarial simulation against a specified target function asynchronously.
@@ -161,6 +160,28 @@ class AdversarialSimulator:
161
160
 
162
161
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
163
162
  :rtype: List[Dict[str, Any]]
163
+
164
+ **Output format**
165
+
166
+ .. code-block:: python
167
+
168
+ return_value = [
169
+ {
170
+ 'template_parameters': {},
171
+ 'messages': [
172
+ {
173
+ 'content': '<jailbreak prompt> <adversarial query>',
174
+ 'role': 'user'
175
+ },
176
+ {
177
+ 'content': "<response from endpoint>",
178
+ 'role': 'assistant',
179
+ 'context': None
180
+ }
181
+ ],
182
+ '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
183
+ }
184
+ ]
164
185
  """
165
186
 
166
187
  # validate the inputs
@@ -196,7 +217,6 @@ class AdversarialSimulator:
196
217
  total_tasks,
197
218
  )
198
219
  total_tasks = min(total_tasks, max_simulation_results)
199
- _jailbreak_type = kwargs.get("_jailbreak_type", None)
200
220
  if _jailbreak_type:
201
221
  jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
202
222
  progress_bar = tqdm(
@@ -205,18 +225,17 @@ class AdversarialSimulator:
205
225
  ncols=100,
206
226
  unit="simulations",
207
227
  )
208
-
209
- if randomize_order:
210
- # The template parameter lists are persistent across sim runs within a session,
211
- # So randomize a the selection instead of the parameter list directly,
212
- # or a potentially large deep copy.
213
- if randomization_seed is not None:
214
- random.seed(randomization_seed)
215
- random.shuffle(templates)
216
- parameter_lists = [t.template_parameters for t in templates]
217
- zipped_parameters = list(zip_longest(*parameter_lists))
218
- for param_group in zipped_parameters:
219
- for template, parameter in zip(templates, param_group):
228
+ for template in templates:
229
+ parameter_order = list(range(len(template.template_parameters)))
230
+ if randomize_order:
231
+ # The template parameter lists are persistent across sim runs within a session,
232
+ # So randomize a the selection instead of the parameter list directly,
233
+ # or a potentially large deep copy.
234
+ if randomization_seed is not None:
235
+ random.seed(randomization_seed)
236
+ random.shuffle(parameter_order)
237
+ for index in parameter_order:
238
+ parameter = template.template_parameters[index].copy()
220
239
  if _jailbreak_type == "upia":
221
240
  parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
222
241
  tasks.append(
@@ -245,21 +264,16 @@ class AdversarialSimulator:
245
264
 
246
265
  return JsonLineList(sim_results)
247
266
 
248
- def _to_chat_protocol(
249
- self,
250
- *,
251
- conversation_history: List[ConversationTurn],
252
- template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
253
- ):
267
+ def _to_chat_protocol(self, *, conversation_history, template_parameters: Dict = None):
254
268
  if template_parameters is None:
255
269
  template_parameters = {}
256
270
  messages = []
257
271
  for _, m in enumerate(conversation_history):
258
272
  message = {"content": m.message, "role": m.role.value}
259
- if m.full_response is not None and "context" in m.full_response:
273
+ if "context" in m.full_response:
260
274
  message["context"] = m.full_response["context"]
261
275
  messages.append(message)
262
- conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
276
+ conversation_category = template_parameters.pop("metadata", {}).get("Category")
263
277
  template_parameters["metadata"] = {}
264
278
  for key in (
265
279
  "conversation_starter",
@@ -267,9 +281,6 @@ class AdversarialSimulator:
267
281
  "target_population",
268
282
  "topic",
269
283
  "ch_template_placeholder",
270
- "chatbot_name",
271
- "name",
272
- "group",
273
284
  ):
274
285
  template_parameters.pop(key, None)
275
286
  if conversation_category:
@@ -284,14 +295,14 @@ class AdversarialSimulator:
284
295
  self,
285
296
  *,
286
297
  target: Callable,
287
- template: AdversarialTemplate,
288
- parameters: TemplateParameters,
289
- max_conversation_turns: int,
290
- api_call_retry_limit: int,
291
- api_call_retry_sleep_sec: int,
292
- api_call_delay_sec: int,
293
- language: SupportedLanguages,
294
- semaphore: asyncio.Semaphore,
298
+ template,
299
+ parameters,
300
+ max_conversation_turns,
301
+ api_call_retry_limit,
302
+ api_call_retry_sleep_sec,
303
+ api_call_delay_sec,
304
+ language,
305
+ semaphore,
295
306
  ) -> List[Dict]:
296
307
  user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
297
308
  system_bot = self._setup_bot(
@@ -314,15 +325,9 @@ class AdversarialSimulator:
314
325
  api_call_delay_sec=api_call_delay_sec,
315
326
  language=language,
316
327
  )
328
+ return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)
317
329
 
318
- return self._to_chat_protocol(
319
- conversation_history=conversation_history,
320
- template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
321
- )
322
-
323
- def _get_user_proxy_completion_model(
324
- self, template_key: str, template_parameters: TemplateParameters
325
- ) -> ProxyChatCompletionsModel:
330
+ def _get_user_proxy_completion_model(self, template_key, template_parameters):
326
331
  return ProxyChatCompletionsModel(
327
332
  name="raisvc_proxy_model",
328
333
  template_key=template_key,
@@ -334,15 +339,8 @@ class AdversarialSimulator:
334
339
  temperature=0.0,
335
340
  )
336
341
 
337
- def _setup_bot(
338
- self,
339
- *,
340
- role: ConversationRole,
341
- template: AdversarialTemplate,
342
- parameters: TemplateParameters,
343
- target: Optional[Callable] = None,
344
- ) -> ConversationBot:
345
- if role is ConversationRole.USER:
342
+ def _setup_bot(self, *, role, template, parameters, target: Callable = None):
343
+ if role == ConversationRole.USER:
346
344
  model = self._get_user_proxy_completion_model(
347
345
  template_key=template.template_name, template_parameters=parameters
348
346
  )
@@ -353,46 +351,27 @@ class AdversarialSimulator:
353
351
  instantiation_parameters=parameters,
354
352
  )
355
353
 
356
- if role is ConversationRole.ASSISTANT:
357
- if target is None:
358
- msg = "Cannot setup system bot. Target is None"
359
-
360
- raise EvaluationException(
361
- message=msg,
362
- internal_message=msg,
363
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
364
- error_category=ErrorCategory.INVALID_VALUE,
365
- blame=ErrorBlame.SYSTEM_ERROR,
366
- )
367
-
368
- class DummyModel:
369
- def __init__(self):
370
- self.name = "dummy_model"
371
-
372
- def __call__(self) -> None:
373
- pass
374
-
354
+ if role == ConversationRole.ASSISTANT:
355
+ dummy_model = lambda: None # noqa: E731
356
+ dummy_model.name = "dummy_model"
375
357
  return CallbackConversationBot(
376
358
  callback=target,
377
359
  role=role,
378
- model=DummyModel(),
360
+ model=dummy_model,
379
361
  user_template=str(template),
380
362
  user_template_parameters=parameters,
381
363
  conversation_template="",
382
364
  instantiation_parameters={},
383
365
  )
384
-
385
- msg = "Invalid value for enum ConversationRole. This should never happen."
386
- raise EvaluationException(
387
- message=msg,
388
- internal_message=msg,
389
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
390
- category=ErrorCategory.INVALID_VALUE,
391
- blame=ErrorBlame.SYSTEM_ERROR,
366
+ return ConversationBot(
367
+ role=role,
368
+ model=model,
369
+ conversation_template=template,
370
+ instantiation_parameters=parameters,
392
371
  )
393
372
 
394
- def _join_conversation_starter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
395
- key: Literal["conversation_starter"] = "conversation_starter"
373
+ def _join_conversation_starter(self, parameters, to_join):
374
+ key = "conversation_starter"
396
375
  if key in parameters.keys():
397
376
  parameters[key] = f"{to_join} {parameters[key]}"
398
377
  else:
@@ -5,17 +5,7 @@ from enum import Enum
5
5
 
6
6
 
7
7
  class SupportedLanguages(Enum):
8
- """Supported languages for evaluation, using ISO standard language codes.
9
-
10
- .. admonition:: Example:
11
-
12
- .. literalinclude:: ../samples/evaluation_samples_simulate.py
13
- :start-after: [START supported_languages]
14
- :end-before: [END supported_languages]
15
- :language: python
16
- :dedent: 8
17
- :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
18
- """
8
+ """Supported languages for evaluation, using ISO standard language codes."""
19
9
 
20
10
  Spanish = "es"
21
11
  Italian = "it"
@@ -7,15 +7,14 @@ import copy
7
7
  import logging
8
8
  import time
9
9
  from dataclasses import dataclass
10
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
11
 
12
12
  import jinja2
13
13
 
14
- from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
15
14
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
15
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
16
16
 
17
17
  from .._model_tools import LLMBase, OpenAIChatCompletionsModel
18
- from .._model_tools._template_handler import TemplateParameters
19
18
  from .constants import ConversationRole
20
19
 
21
20
 
@@ -41,7 +40,7 @@ class ConversationTurn:
41
40
  role: "ConversationRole"
42
41
  name: Optional[str] = None
43
42
  message: str = ""
44
- full_response: Optional[Dict[str, Any]] = None
43
+ full_response: Optional[Any] = None
45
44
  request: Optional[Any] = None
46
45
 
47
46
  def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
@@ -110,7 +109,7 @@ class ConversationBot:
110
109
  role: ConversationRole,
111
110
  model: Union[LLMBase, OpenAIChatCompletionsModel],
112
111
  conversation_template: str,
113
- instantiation_parameters: TemplateParameters,
112
+ instantiation_parameters: Dict[str, str],
114
113
  ) -> None:
115
114
  self.role = role
116
115
  self.conversation_template_orig = conversation_template
@@ -119,13 +118,13 @@ class ConversationBot:
119
118
  )
120
119
  self.persona_template_args = instantiation_parameters
121
120
  if self.role == ConversationRole.USER:
122
- self.name: str = cast(str, self.persona_template_args.get("name", role.value))
121
+ self.name = self.persona_template_args.get("name", role.value)
123
122
  else:
124
- self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
123
+ self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
125
124
  self.model = model
126
125
 
127
126
  self.logger = logging.getLogger(repr(self))
128
- self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
127
+ self.conversation_starter = None # can either be a dictionary or jinja template
129
128
  if role == ConversationRole.USER:
130
129
  if "conversation_starter" in self.persona_template_args:
131
130
  conversation_starter_content = self.persona_template_args["conversation_starter"]
@@ -149,7 +148,7 @@ class ConversationBot:
149
148
  conversation_history: List[ConversationTurn],
150
149
  max_history: int,
151
150
  turn_number: int = 0,
152
- ) -> Tuple[dict, dict, float, dict]:
151
+ ) -> Tuple[dict, dict, int, dict]:
153
152
  """
154
153
  Prompt the ConversationBot for a response.
155
154
 
@@ -162,7 +161,7 @@ class ConversationBot:
162
161
  :param turn_number: Parameters used to query GPT-4 model.
163
162
  :type turn_number: int
164
163
  :return: The response from the ConversationBot.
165
- :rtype: Tuple[dict, dict, float, dict]
164
+ :rtype: Tuple[dict, dict, int, dict]
166
165
  """
167
166
 
168
167
  # check if this is the first turn and the conversation_starter is not None,
@@ -170,11 +169,11 @@ class ConversationBot:
170
169
  if turn_number == 0 and self.conversation_starter is not None:
171
170
  # if conversation_starter is a dictionary, pass it into samples as is
172
171
  if isinstance(self.conversation_starter, dict):
173
- samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
172
+ samples = [self.conversation_starter]
174
173
  if isinstance(self.conversation_starter, jinja2.Template):
175
174
  samples = [self.conversation_starter.render(**self.persona_template_args)]
176
175
  else:
177
- samples = [self.conversation_starter]
176
+ samples = [self.conversation_starter] # type: ignore[attr-defined]
178
177
  time_taken = 0
179
178
 
180
179
  finish_reason = ["stop"]
@@ -239,7 +238,7 @@ class CallbackConversationBot(ConversationBot):
239
238
  self,
240
239
  callback: Callable,
241
240
  user_template: str,
242
- user_template_parameters: TemplateParameters,
241
+ user_template_parameters: Dict,
243
242
  *args,
244
243
  **kwargs,
245
244
  ) -> None:
@@ -255,7 +254,7 @@ class CallbackConversationBot(ConversationBot):
255
254
  conversation_history: List[Any],
256
255
  max_history: int,
257
256
  turn_number: int = 0,
258
- ) -> Tuple[dict, dict, float, dict]:
257
+ ) -> Tuple[dict, dict, int, dict]:
259
258
  chat_protocol_message = self._to_chat_protocol(
260
259
  self.user_template, conversation_history, self.user_template_parameters
261
260
  )
@@ -4,14 +4,14 @@
4
4
 
5
5
  import asyncio
6
6
  import logging
7
- from typing import Callable, Dict, List, Optional, Tuple, Union
7
+ from typing import Callable, Dict, List, Tuple, Union
8
8
 
9
- from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
- from azure.ai.evaluation.simulator._constants import SupportedLanguages
11
9
  from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
10
+ from azure.ai.evaluation.simulator._constants import SupportedLanguages
12
11
 
13
12
  from ..._http_utils import AsyncHttpPipeline
14
13
  from . import ConversationBot, ConversationTurn
14
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
15
15
 
16
16
 
17
17
  def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
@@ -80,26 +80,26 @@ async def simulate_conversation(
80
80
  history_limit: int = 5,
81
81
  api_call_delay_sec: float = 0,
82
82
  logger: logging.Logger = logging.getLogger(__name__),
83
- ) -> Tuple[Optional[str], List[ConversationTurn]]:
83
+ ) -> Tuple:
84
84
  """
85
85
  Simulate a conversation between the given bots.
86
86
 
87
- :keyword bots: List of ConversationBot instances participating in the conversation.
88
- :paramtype bots: List[ConversationBot]
89
- :keyword session: The session to use for making API calls.
90
- :paramtype session: AsyncHttpPipeline
91
- :keyword stopping_criteria: A callable that determines when the conversation should stop.
92
- :paramtype stopping_criteria: Callable[[str], bool]
93
- :keyword turn_limit: The maximum number of turns in the conversation. Defaults to 10.
94
- :paramtype turn_limit: int
95
- :keyword history_limit: The maximum number of turns to keep in the conversation history. Defaults to 5.
96
- :paramtype history_limit: int
97
- :keyword api_call_delay_sec: Delay between API calls in seconds. Defaults to 0.
98
- :paramtype api_call_delay_sec: float
99
- :keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
100
- :paramtype logger: logging.Logger
87
+ :param bots: List of ConversationBot instances participating in the conversation.
88
+ :type bots: List[ConversationBot]
89
+ :param session: The session to use for making API calls.
90
+ :type session: AsyncHttpPipeline
91
+ :param stopping_criteria: A callable that determines when the conversation should stop.
92
+ :type stopping_criteria: Callable[[str], bool]
93
+ :param turn_limit: The maximum number of turns in the conversation. Defaults to 10.
94
+ :type turn_limit: int
95
+ :param history_limit: The maximum number of turns to keep in the conversation history. Defaults to 5.
96
+ :type history_limit: int
97
+ :param api_call_delay_sec: Delay between API calls in seconds. Defaults to 0.
98
+ :type api_call_delay_sec: float
99
+ :param logger: The logger to use for logging. Defaults to the logger named after the current module.
100
+ :type logger: logging.Logger
101
101
  :return: Simulation a conversation between the given bots.
102
- :rtype: Tuple[Optional[str], List[ConversationTurn]]
102
+ :rtype: Tuple
103
103
  """
104
104
 
105
105
  # Read the first prompt.
@@ -110,7 +110,7 @@ async def simulate_conversation(
110
110
  turn_number=0,
111
111
  )
112
112
  if "id" in first_response:
113
- conversation_id: Optional[str] = first_response["id"]
113
+ conversation_id = first_response["id"]
114
114
  else:
115
115
  conversation_id = None
116
116
  first_prompt = first_response["samples"][0]