azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (100) hide show
  1. azure/ai/evaluation/__init__.py +60 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/constants.py +65 -0
  4. azure/ai/evaluation/_common/rai_service.py +452 -0
  5. azure/ai/evaluation/_common/utils.py +87 -0
  6. azure/ai/evaluation/_constants.py +50 -0
  7. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  8. azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +8 -0
  9. azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +72 -0
  10. azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +150 -0
  11. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +61 -0
  12. azure/ai/evaluation/_evaluate/_eval_run.py +494 -0
  13. azure/ai/evaluation/_evaluate/_evaluate.py +689 -0
  14. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +174 -0
  15. azure/ai/evaluation/_evaluate/_utils.py +237 -0
  16. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  17. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  18. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +73 -0
  19. azure/ai/evaluation/_evaluators/_chat/__init__.py +9 -0
  20. azure/ai/evaluation/_evaluators/_chat/_chat.py +350 -0
  21. azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +9 -0
  22. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +163 -0
  23. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
  24. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  25. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +122 -0
  26. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +62 -0
  27. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +21 -0
  28. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +108 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +66 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +296 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +78 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +76 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +76 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +76 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +99 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +141 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +122 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +61 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +71 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +123 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
  47. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  48. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +96 -0
  49. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  50. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -0
  51. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
  52. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
  53. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  54. azure/ai/evaluation/_evaluators/_qa/_qa.py +111 -0
  55. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  56. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +131 -0
  57. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +69 -0
  58. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  59. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  60. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +130 -0
  62. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +71 -0
  63. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  64. azure/ai/evaluation/_evaluators/_xpia/xpia.py +140 -0
  65. azure/ai/evaluation/_exceptions.py +107 -0
  66. azure/ai/evaluation/_http_utils.py +395 -0
  67. azure/ai/evaluation/_model_configurations.py +27 -0
  68. azure/ai/evaluation/_user_agent.py +6 -0
  69. azure/ai/evaluation/_version.py +5 -0
  70. azure/ai/evaluation/py.typed +0 -0
  71. azure/ai/evaluation/simulator/__init__.py +15 -0
  72. azure/ai/evaluation/simulator/_adversarial_scenario.py +27 -0
  73. azure/ai/evaluation/simulator/_adversarial_simulator.py +450 -0
  74. azure/ai/evaluation/simulator/_constants.py +17 -0
  75. azure/ai/evaluation/simulator/_conversation/__init__.py +315 -0
  76. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  77. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  78. azure/ai/evaluation/simulator/_direct_attack_simulator.py +252 -0
  79. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  80. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  81. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +93 -0
  82. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +207 -0
  83. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  84. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +147 -0
  85. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +228 -0
  86. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +157 -0
  87. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +157 -0
  88. azure/ai/evaluation/simulator/_model_tools/models.py +616 -0
  89. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +69 -0
  90. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +36 -0
  91. azure/ai/evaluation/simulator/_tracing.py +92 -0
  92. azure/ai/evaluation/simulator/_utils.py +111 -0
  93. azure/ai/evaluation/simulator/simulator.py +579 -0
  94. azure_ai_evaluation-1.0.0b1.dist-info/METADATA +377 -0
  95. azure_ai_evaluation-1.0.0b1.dist-info/RECORD +97 -0
  96. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0b1.dist-info}/WHEEL +1 -1
  97. azure_ai_evaluation-1.0.0b1.dist-info/top_level.txt +1 -0
  98. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  99. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  100. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,315 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # noqa: E402
5
+
6
+ import copy
7
+ import logging
8
+ import time
9
+ from dataclasses import dataclass
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
+
12
+ import jinja2
13
+
14
+ from azure.ai.evaluation._http_utils import AsyncHttpPipeline
15
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
16
+
17
+ from .._model_tools import LLMBase, OpenAIChatCompletionsModel
18
+ from .constants import ConversationRole
19
+
20
+
21
+ @dataclass
22
+ class ConversationTurn:
23
+ """Class to represent a turn in a conversation.
24
+
25
+ A "turn" involves only one exchange between the user and the chatbot.
26
+
27
+ :param role: The role of the participant in the conversation. Accepted values are
28
+ "user" and "assistant".
29
+ :type role: ~azure.ai.evaluation.simulator._conversation.constants.ConversationRole
30
+ :param name: The name of the participant in the conversation.
31
+ :type name: Optional[str]
32
+ :param message: The message exchanged in the conversation. Defaults to an empty string.
33
+ :type message: str
34
+ :param full_response: The full response.
35
+ :type full_response: Optional[Any]
36
+ :param request: The request.
37
+ :type request: Optional[Any]
38
+ """
39
+
40
+ role: "ConversationRole"
41
+ name: Optional[str] = None
42
+ message: str = ""
43
+ full_response: Optional[Any] = None
44
+ request: Optional[Any] = None
45
+
46
+ def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
47
+ """Convert the conversation turn to the OpenAI chat format.
48
+
49
+ OpenAI chat format is a dictionary with two keys: "role" and "content".
50
+
51
+ :param reverse: Whether to reverse the conversation turn. Defaults to False.
52
+ :type reverse: bool
53
+ :return: The conversation turn in the OpenAI chat format.
54
+ :rtype: Dict[str, str]
55
+ """
56
+ if reverse is False:
57
+ return {"role": self.role.value, "content": self.message}
58
+ if self.role == ConversationRole.ASSISTANT:
59
+ return {"role": ConversationRole.USER.value, "content": self.message}
60
+ return {"role": ConversationRole.ASSISTANT.value, "content": self.message}
61
+
62
+ def to_annotation_format(self, turn_number: int) -> Dict[str, Any]:
63
+ """Convert the conversation turn to an annotation format.
64
+
65
+ Annotation format is a dictionary with the following keys:
66
+ - "turn_number": The turn number.
67
+ - "response": The response.
68
+ - "actor": The actor.
69
+ - "request": The request.
70
+ - "full_json_response": The full JSON response.
71
+
72
+ :param turn_number: The turn number.
73
+ :type turn_number: int
74
+ :return: The conversation turn in the annotation format.
75
+ :rtype: Dict[str, Any]
76
+ """
77
+ return {
78
+ "turn_number": turn_number,
79
+ "response": self.message,
80
+ "actor": self.role.value if self.name is None else self.name,
81
+ "request": self.request,
82
+ "full_json_response": self.full_response,
83
+ }
84
+
85
+ def __str__(self) -> str:
86
+ return f"({self.role.value}): {self.message}"
87
+
88
+
89
+ class ConversationBot:
90
+ """
91
+ A conversation chat bot with a specific name, persona and a sentence that can be used as a conversation starter.
92
+
93
+ :param role: The role of the bot in the conversation, either "user" or "assistant".
94
+ :type role: ~azure.ai.evaluation.simulator._conversation.constants.ConversationRole
95
+ :param model: The LLM model to use for generating responses.
96
+ :type model: Union[
97
+ ~azure.ai.evaluation.simulator._model_tools.LLMBase,
98
+ ~azure.ai.evaluation.simulator._model_tools.OpenAIChatCompletionsModel
99
+ ]
100
+ :param conversation_template: A Jinja2 template describing the conversation to generate the prompt for the LLM
101
+ :type conversation_template: str
102
+ :param instantiation_parameters: A dictionary of parameters used to instantiate the conversation template
103
+ :type instantiation_parameters: Dict[str, str]
104
+ """
105
+
106
+ def __init__(
107
+ self,
108
+ *,
109
+ role: ConversationRole,
110
+ model: Union[LLMBase, OpenAIChatCompletionsModel],
111
+ conversation_template: str,
112
+ instantiation_parameters: Dict[str, str],
113
+ ) -> None:
114
+ self.role = role
115
+ self.conversation_template_orig = conversation_template
116
+ self.conversation_template: jinja2.Template = jinja2.Template(
117
+ conversation_template, undefined=jinja2.StrictUndefined
118
+ )
119
+ self.persona_template_args = instantiation_parameters
120
+ if self.role == ConversationRole.USER:
121
+ self.name = self.persona_template_args.get("name", role.value)
122
+ else:
123
+ self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
124
+ self.model = model
125
+
126
+ self.logger = logging.getLogger(repr(self))
127
+ self.conversation_starter = None # can either be a dictionary or jinja template
128
+ if role == ConversationRole.USER:
129
+ if "conversation_starter" in self.persona_template_args:
130
+ conversation_starter_content = self.persona_template_args["conversation_starter"]
131
+ if isinstance(conversation_starter_content, dict):
132
+ self.conversation_starter = conversation_starter_content
133
+ else:
134
+ try:
135
+ self.conversation_starter = jinja2.Template(
136
+ conversation_starter_content, undefined=jinja2.StrictUndefined
137
+ )
138
+ except jinja2.exceptions.TemplateSyntaxError: # noqa: F841
139
+ self.conversation_starter = conversation_starter_content
140
+ else:
141
+ self.logger.info(
142
+ "This simulated bot will generate the first turn as no conversation starter is provided"
143
+ )
144
+
145
+ async def generate_response(
146
+ self,
147
+ session: AsyncHttpPipeline,
148
+ conversation_history: List[ConversationTurn],
149
+ max_history: int,
150
+ turn_number: int = 0,
151
+ ) -> Tuple[dict, dict, int, dict]:
152
+ """
153
+ Prompt the ConversationBot for a response.
154
+
155
+ :param session: AsyncHttpPipeline to use for the request.
156
+ :type session: AsyncHttpPipeline
157
+ :param conversation_history: The turns in the conversation so far.
158
+ :type conversation_history: List[ConversationTurn]
159
+ :param max_history: Parameters used to query GPT-4 model.
160
+ :type max_history: int
161
+ :param turn_number: Parameters used to query GPT-4 model.
162
+ :type turn_number: int
163
+ :return: The response from the ConversationBot.
164
+ :rtype: Tuple[dict, dict, int, dict]
165
+ """
166
+
167
+ # check if this is the first turn and the conversation_starter is not None,
168
+ # return the conversations starter rather than generating turn using LLM
169
+ if turn_number == 0 and self.conversation_starter is not None:
170
+ # if conversation_starter is a dictionary, pass it into samples as is
171
+ if isinstance(self.conversation_starter, dict):
172
+ samples = [self.conversation_starter]
173
+ if isinstance(self.conversation_starter, jinja2.Template):
174
+ samples = [self.conversation_starter.render(**self.persona_template_args)]
175
+ else:
176
+ samples = [self.conversation_starter] # type: ignore[attr-defined]
177
+ time_taken = 0
178
+
179
+ finish_reason = ["stop"]
180
+
181
+ parsed_response = {"samples": samples, "finish_reason": finish_reason, "id": None}
182
+ full_response = parsed_response
183
+ return parsed_response, {}, time_taken, full_response
184
+
185
+ try:
186
+ prompt = self.conversation_template.render(
187
+ conversation_turns=conversation_history[-max_history:],
188
+ role=self.role.value,
189
+ **self.persona_template_args,
190
+ )
191
+ except Exception: # pylint: disable=broad-except
192
+ import code
193
+
194
+ code.interact(local=locals())
195
+
196
+ messages = [{"role": "system", "content": prompt}]
197
+
198
+ # The ChatAPI must respond as ASSISTANT, so if this bot is USER, we need to reverse the messages
199
+ if (self.role == ConversationRole.USER) and (isinstance(self.model, (OpenAIChatCompletionsModel))):
200
+ # in here we need to simulate the user, The chatapi only generate turn as assistant and
201
+ # can't generate turn as user
202
+ # thus we reverse all rules in history messages,
203
+ # so that messages produced from the other bot passed here as user messages
204
+ messages.extend([turn.to_openai_chat_format(reverse=True) for turn in conversation_history[-max_history:]])
205
+ prompt_role = ConversationRole.USER.value
206
+ else:
207
+ messages.extend([turn.to_openai_chat_format() for turn in conversation_history[-max_history:]])
208
+ prompt_role = self.role.value
209
+
210
+ response = await self.model.get_conversation_completion(
211
+ messages=messages,
212
+ session=session,
213
+ role=prompt_role,
214
+ )
215
+
216
+ return response["response"], response["request"], response["time_taken"], response["full_response"]
217
+
218
+ def __repr__(self):
219
+ return f"Bot(name={self.name}, role={self.role.name}, model={self.model.__class__.__name__})"
220
+
221
+
222
+ class CallbackConversationBot(ConversationBot):
223
+ """Conversation bot that uses a user provided callback to generate responses.
224
+
225
+ :param callback: The callback function to use to generate responses.
226
+ :type callback: Callable
227
+ :param user_template: The template to use for the request.
228
+ :type user_template: str
229
+ :param user_template_parameters: The template parameters to use for the request.
230
+ :type user_template_parameters: Dict
231
+ :param args: Optional arguments to pass to the parent class.
232
+ :type args: Any
233
+ :param kwargs: Optional keyword arguments to pass to the parent class.
234
+ :type kwargs: Any
235
+ """
236
+
237
+ def __init__(
238
+ self,
239
+ callback: Callable,
240
+ user_template: str,
241
+ user_template_parameters: Dict,
242
+ *args,
243
+ **kwargs,
244
+ ) -> None:
245
+ self.callback = callback
246
+ self.user_template = user_template
247
+ self.user_template_parameters = user_template_parameters
248
+
249
+ super().__init__(*args, **kwargs)
250
+
251
+ async def generate_response(
252
+ self,
253
+ session: AsyncHttpPipeline,
254
+ conversation_history: List[Any],
255
+ max_history: int,
256
+ turn_number: int = 0,
257
+ ) -> Tuple[dict, dict, int, dict]:
258
+ chat_protocol_message = self._to_chat_protocol(
259
+ self.user_template, conversation_history, self.user_template_parameters
260
+ )
261
+ msg_copy = copy.deepcopy(chat_protocol_message)
262
+ result = {}
263
+ start_time = time.time()
264
+ result = await self.callback(msg_copy)
265
+ end_time = time.time()
266
+ if not result:
267
+ result = {
268
+ "messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
269
+ "finish_reason": ["stop"],
270
+ "id": None,
271
+ "template_parameters": {},
272
+ }
273
+ self.logger.info("Using user provided callback returning response.")
274
+
275
+ time_taken = end_time - start_time
276
+ try:
277
+ response = {
278
+ "samples": [result["messages"][-1]["content"]],
279
+ "finish_reason": ["stop"],
280
+ "id": None,
281
+ }
282
+ except Exception as exc:
283
+ msg = "User provided callback does not conform to chat protocol standard."
284
+ raise EvaluationException(
285
+ message=msg,
286
+ internal_message=msg,
287
+ target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
288
+ category=ErrorCategory.INVALID_VALUE,
289
+ blame=ErrorBlame.USER_ERROR,
290
+ ) from exc
291
+
292
+ self.logger.info("Parsed callback response")
293
+
294
+ return response, {}, time_taken, result
295
+
296
+ # Bug 3354264: template is unused in the method - is this intentional?
297
+ def _to_chat_protocol(self, template, conversation_history, template_parameters): # pylint: disable=unused-argument
298
+ messages = []
299
+
300
+ for _, m in enumerate(conversation_history):
301
+ messages.append({"content": m.message, "role": m.role.value})
302
+
303
+ return {
304
+ "template_parameters": template_parameters,
305
+ "messages": messages,
306
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
307
+ }
308
+
309
+
310
+ __all__ = [
311
+ "ConversationRole",
312
+ "ConversationBot",
313
+ "CallbackConversationBot",
314
+ "ConversationTurn",
315
+ ]
@@ -0,0 +1,178 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import Callable, Dict, List, Tuple, Union
8
+
9
+ from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
10
+ from azure.ai.evaluation.simulator._constants import SupportedLanguages
11
+
12
+ from ..._http_utils import AsyncHttpPipeline
13
+ from . import ConversationBot, ConversationTurn
14
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
15
+
16
+
17
+ def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
18
+ """Determine if a response indicates an end to the conversation.
19
+
20
+ :param response: The response to check.
21
+ :type response: Union[Dict, str]
22
+ :param recursion_depth: The current recursion depth. Defaults to 0.
23
+ :type recursion_depth: int
24
+ :return: True if the response indicates an end to the conversation, False otherwise.
25
+ :rtype: bool
26
+ """
27
+ if recursion_depth > 10:
28
+ msg = "Exceeded max call depth in is_closing_message"
29
+ raise EvaluationException(
30
+ message=msg,
31
+ internal_message=msg,
32
+ error_category=ErrorCategory.INVALID_VALUE,
33
+ error_target=ErrorTarget.CONVERSATION,
34
+ error_blame=ErrorBlame.USER_ERROR,
35
+ )
36
+
37
+ # recursively go through each inner dictionary in the JSON dict
38
+ # and check if any value entry contains a closing message
39
+ if isinstance(response, dict):
40
+ for value in response.values():
41
+ if is_closing_message(value, recursion_depth=recursion_depth + 1):
42
+ return True
43
+ elif isinstance(response, str):
44
+ return is_closing_message_helper(response)
45
+
46
+ return False
47
+
48
+
49
+ def is_closing_message_helper(response: str) -> bool:
50
+ """Determine if a response indicates an end to the conversation.
51
+
52
+ :param response: The response to check.
53
+ :type response: str
54
+ :return: True if the response indicates an end to the conversation, False otherwise.
55
+ :rtype: bool
56
+ """
57
+ message = response.lower()
58
+ if "?" in message.lower():
59
+ return False
60
+ punctuation = [".", ",", "!", ";", ":"]
61
+ for p in punctuation:
62
+ message = message.replace(p, "")
63
+ if (
64
+ "bye" not in message.lower().split()
65
+ and "goodbye" not in message.lower().split()
66
+ # and "thanks" not in message.lower()
67
+ # and "thank" not in message.lower()
68
+ ):
69
+ return False
70
+ return True
71
+
72
+
73
+ async def simulate_conversation(
74
+ *,
75
+ bots: List[ConversationBot],
76
+ session: AsyncHttpPipeline,
77
+ language: SupportedLanguages,
78
+ stopping_criteria: Callable[[str], bool] = is_closing_message,
79
+ turn_limit: int = 10,
80
+ history_limit: int = 5,
81
+ api_call_delay_sec: float = 0,
82
+ logger: logging.Logger = logging.getLogger(__name__),
83
+ ) -> Tuple:
84
+ """
85
+ Simulate a conversation between the given bots.
86
+
87
+ :param bots: List of ConversationBot instances participating in the conversation.
88
+ :type bots: List[ConversationBot]
89
+ :param session: The session to use for making API calls.
90
+ :type session: AsyncHttpPipeline
91
+ :param stopping_criteria: A callable that determines when the conversation should stop.
92
+ :type stopping_criteria: Callable[[str], bool]
93
+ :param turn_limit: The maximum number of turns in the conversation. Defaults to 10.
94
+ :type turn_limit: int
95
+ :param history_limit: The maximum number of turns to keep in the conversation history. Defaults to 5.
96
+ :type history_limit: int
97
+ :param api_call_delay_sec: Delay between API calls in seconds. Defaults to 0.
98
+ :type api_call_delay_sec: float
99
+ :param logger: The logger to use for logging. Defaults to the logger named after the current module.
100
+ :type logger: logging.Logger
101
+ :return: Simulation a conversation between the given bots.
102
+ :rtype: Tuple
103
+ """
104
+
105
+ # Read the first prompt.
106
+ (first_response, request, _, full_response) = await bots[0].generate_response(
107
+ session=session,
108
+ conversation_history=[],
109
+ max_history=history_limit,
110
+ turn_number=0,
111
+ )
112
+ if "id" in first_response:
113
+ conversation_id = first_response["id"]
114
+ else:
115
+ conversation_id = None
116
+ first_prompt = first_response["samples"][0]
117
+ if language != SupportedLanguages.English:
118
+ if not isinstance(language, SupportedLanguages) or language not in SupportedLanguages:
119
+ raise Exception( # pylint: disable=broad-exception-raised
120
+ f"Language option '{language}' isn't supported. Select a supported language option from "
121
+ f"azure.ai.evaluation.simulator.SupportedLanguages: {[f'{e}' for e in SupportedLanguages]}"
122
+ )
123
+ first_prompt += f" {SUPPORTED_LANGUAGES_MAPPING[language]}"
124
+ # Add all generated turns into array to pass for each bot while generating
125
+ # new responses. We add generated response and the person generating it.
126
+ # in the case of the first turn, it is supposed to be the user search query
127
+ conversation_history = [
128
+ ConversationTurn(
129
+ role=bots[0].role,
130
+ name=bots[0].name,
131
+ message=first_prompt,
132
+ full_response=full_response,
133
+ request=request,
134
+ )
135
+ ]
136
+
137
+ # initialize the turn counter
138
+ current_turn = 1
139
+
140
+ # Keep iterating and alternate between bots until a stopping word is
141
+ # generated or maximum number of turns is reached.
142
+ while (not stopping_criteria(conversation_history[-1].message)) and (current_turn < turn_limit):
143
+ try:
144
+ current_character_idx = current_turn % len(bots)
145
+ current_bot = bots[current_character_idx]
146
+ # invoke Bot to generate response given the input request
147
+ # pass only the last generated turn without passing the bot name.
148
+ response, request, _, full_response = await current_bot.generate_response(
149
+ session=session,
150
+ conversation_history=conversation_history,
151
+ max_history=history_limit,
152
+ turn_number=current_turn,
153
+ )
154
+
155
+ # check if conversation id is null, which means conversation starter was used. use id from next turn
156
+ if conversation_id is None and "id" in response:
157
+ conversation_id = response["id"]
158
+ # add the generated response to the list of generated responses
159
+ conversation_history.append(
160
+ ConversationTurn(
161
+ role=current_bot.role,
162
+ name=current_bot.name,
163
+ message=response["samples"][0],
164
+ full_response=full_response,
165
+ request=request,
166
+ )
167
+ )
168
+ except Exception as e: # pylint: disable=broad-except
169
+ logger.warning("Error: %s", str(e))
170
+
171
+ # Increment outside the try block so we don't get stuck if
172
+ # an exception is thrown
173
+ current_turn += 1
174
+
175
+ # Sleep between consecutive requests to avoid rate limit
176
+ await asyncio.sleep(api_call_delay_sec)
177
+
178
+ return conversation_id, conversation_history
@@ -0,0 +1,30 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from enum import Enum
6
+
7
+ BOT_NAMES = ["chat_bot", "other_bot"]
8
+ TASK_BOT_NAMES = ["system_bot", "simulated_bot"]
9
+
10
+ REQUESTS_BATCH_SIZE = 200 # Number of input lines to process at once, must fit into memory
11
+ OUTPUT_FILE = "openai_api_response.jsonl"
12
+
13
+ # Azure endpoint constants
14
+ AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
15
+ COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
16
+ AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
17
+ AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
18
+ r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
19
+ r"(\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*\."
20
+ r"(inference\.ml|openai)\.azure\.com$"
21
+ )
22
+ CHAT_START_TOKEN = "<|im_start|>"
23
+ CHAT_END_TOKEN = "<|im_end|>"
24
+
25
+
26
+ class ConversationRole(Enum):
27
+ """Role in a chatbot conversation"""
28
+
29
+ USER = "user"
30
+ ASSISTANT = "assistant"