azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. azure/ai/evaluation/__init__.py +82 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/_experimental.py +172 -0
  4. azure/ai/evaluation/_common/constants.py +72 -0
  5. azure/ai/evaluation/_common/math.py +89 -0
  6. azure/ai/evaluation/_common/rai_service.py +632 -0
  7. azure/ai/evaluation/_common/utils.py +445 -0
  8. azure/ai/evaluation/_constants.py +72 -0
  9. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  10. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
  11. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
  12. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
  13. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
  14. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  15. azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
  16. azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
  17. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
  18. azure/ai/evaluation/_evaluate/_utils.py +298 -0
  19. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  20. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  21. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
  22. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  23. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
  24. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
  25. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  26. azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
  27. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
  28. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  47. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  48. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  49. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
  50. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  51. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  52. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  53. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  54. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  55. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  56. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  57. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  58. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  59. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
  60. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
  62. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
  64. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
  65. azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
  66. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
  67. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  68. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  69. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  70. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  72. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  73. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
  74. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
  75. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  76. azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
  77. azure/ai/evaluation/_exceptions.py +128 -0
  78. azure/ai/evaluation/_http_utils.py +466 -0
  79. azure/ai/evaluation/_model_configurations.py +123 -0
  80. azure/ai/evaluation/_user_agent.py +6 -0
  81. azure/ai/evaluation/_vendor/__init__.py +3 -0
  82. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  83. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  84. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  85. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  86. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  87. azure/ai/evaluation/_version.py +5 -0
  88. azure/ai/evaluation/py.typed +0 -0
  89. azure/ai/evaluation/simulator/__init__.py +16 -0
  90. azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
  91. azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
  92. azure/ai/evaluation/simulator/_constants.py +27 -0
  93. azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
  94. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  95. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  96. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  97. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  98. azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
  99. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  100. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  101. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
  102. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
  103. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  104. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
  105. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
  106. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
  107. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
  108. azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
  109. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  110. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
  111. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
  112. azure/ai/evaluation/simulator/_simulator.py +716 -0
  113. azure/ai/evaluation/simulator/_tracing.py +89 -0
  114. azure/ai/evaluation/simulator/_utils.py +132 -0
  115. azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
  116. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
  117. azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
  118. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
  119. azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
  120. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  121. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  122. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,316 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # noqa: E402
5
+
6
+ import copy
7
+ import logging
8
+ import time
9
+ from dataclasses import dataclass
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
11
+
12
+ import jinja2
13
+
14
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
15
+ from azure.ai.evaluation._http_utils import AsyncHttpPipeline
16
+
17
+ from .._model_tools import LLMBase, OpenAIChatCompletionsModel
18
+ from .._model_tools._template_handler import TemplateParameters
19
+ from .constants import ConversationRole
20
+
21
+
22
+ @dataclass
23
+ class ConversationTurn:
24
+ """Class to represent a turn in a conversation.
25
+
26
+ A "turn" involves only one exchange between the user and the chatbot.
27
+
28
+ :param role: The role of the participant in the conversation. Accepted values are
29
+ "user" and "assistant".
30
+ :type role: ~azure.ai.evaluation.simulator._conversation.constants.ConversationRole
31
+ :param name: The name of the participant in the conversation.
32
+ :type name: Optional[str]
33
+ :param message: The message exchanged in the conversation. Defaults to an empty string.
34
+ :type message: str
35
+ :param full_response: The full response.
36
+ :type full_response: Optional[Any]
37
+ :param request: The request.
38
+ :type request: Optional[Any]
39
+ """
40
+
41
+ role: "ConversationRole"
42
+ name: Optional[str] = None
43
+ message: str = ""
44
+ full_response: Optional[Dict[str, Any]] = None
45
+ request: Optional[Any] = None
46
+
47
+ def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
48
+ """Convert the conversation turn to the OpenAI chat format.
49
+
50
+ OpenAI chat format is a dictionary with two keys: "role" and "content".
51
+
52
+ :param reverse: Whether to reverse the conversation turn. Defaults to False.
53
+ :type reverse: bool
54
+ :return: The conversation turn in the OpenAI chat format.
55
+ :rtype: Dict[str, str]
56
+ """
57
+ if reverse is False:
58
+ return {"role": self.role.value, "content": self.message}
59
+ if self.role == ConversationRole.ASSISTANT:
60
+ return {"role": ConversationRole.USER.value, "content": self.message}
61
+ return {"role": ConversationRole.ASSISTANT.value, "content": self.message}
62
+
63
+ def to_annotation_format(self, turn_number: int) -> Dict[str, Any]:
64
+ """Convert the conversation turn to an annotation format.
65
+
66
+ Annotation format is a dictionary with the following keys:
67
+ - "turn_number": The turn number.
68
+ - "response": The response.
69
+ - "actor": The actor.
70
+ - "request": The request.
71
+ - "full_json_response": The full JSON response.
72
+
73
+ :param turn_number: The turn number.
74
+ :type turn_number: int
75
+ :return: The conversation turn in the annotation format.
76
+ :rtype: Dict[str, Any]
77
+ """
78
+ return {
79
+ "turn_number": turn_number,
80
+ "response": self.message,
81
+ "actor": self.role.value if self.name is None else self.name,
82
+ "request": self.request,
83
+ "full_json_response": self.full_response,
84
+ }
85
+
86
+ def __str__(self) -> str:
87
+ return f"({self.role.value}): {self.message}"
88
+
89
+
90
+ class ConversationBot:
91
+ """
92
+ A conversation chat bot with a specific name, persona and a sentence that can be used as a conversation starter.
93
+
94
+ :param role: The role of the bot in the conversation, either "user" or "assistant".
95
+ :type role: ~azure.ai.evaluation.simulator._conversation.constants.ConversationRole
96
+ :param model: The LLM model to use for generating responses.
97
+ :type model: Union[
98
+ ~azure.ai.evaluation.simulator._model_tools.LLMBase,
99
+ ~azure.ai.evaluation.simulator._model_tools.OpenAIChatCompletionsModel
100
+ ]
101
+ :param conversation_template: A Jinja2 template describing the conversation to generate the prompt for the LLM
102
+ :type conversation_template: str
103
+ :param instantiation_parameters: A dictionary of parameters used to instantiate the conversation template
104
+ :type instantiation_parameters: Dict[str, str]
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ *,
110
+ role: ConversationRole,
111
+ model: Union[LLMBase, OpenAIChatCompletionsModel],
112
+ conversation_template: str,
113
+ instantiation_parameters: TemplateParameters,
114
+ ) -> None:
115
+ self.role = role
116
+ self.conversation_template_orig = conversation_template
117
+ self.conversation_template: jinja2.Template = jinja2.Template(
118
+ conversation_template, undefined=jinja2.StrictUndefined
119
+ )
120
+ self.persona_template_args = instantiation_parameters
121
+ if self.role == ConversationRole.USER:
122
+ self.name: str = cast(str, self.persona_template_args.get("name", role.value))
123
+ else:
124
+ self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
125
+ self.model = model
126
+
127
+ self.logger = logging.getLogger(repr(self))
128
+ self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
129
+ if role == ConversationRole.USER:
130
+ if "conversation_starter" in self.persona_template_args:
131
+ conversation_starter_content = self.persona_template_args["conversation_starter"]
132
+ if isinstance(conversation_starter_content, dict):
133
+ self.conversation_starter = conversation_starter_content
134
+ else:
135
+ try:
136
+ self.conversation_starter = jinja2.Template(
137
+ conversation_starter_content, undefined=jinja2.StrictUndefined
138
+ )
139
+ except jinja2.exceptions.TemplateSyntaxError: # noqa: F841
140
+ self.conversation_starter = conversation_starter_content
141
+ else:
142
+ self.logger.info(
143
+ "This simulated bot will generate the first turn as no conversation starter is provided"
144
+ )
145
+
146
+ async def generate_response(
147
+ self,
148
+ session: AsyncHttpPipeline,
149
+ conversation_history: List[ConversationTurn],
150
+ max_history: int,
151
+ turn_number: int = 0,
152
+ ) -> Tuple[dict, dict, float, dict]:
153
+ """
154
+ Prompt the ConversationBot for a response.
155
+
156
+ :param session: AsyncHttpPipeline to use for the request.
157
+ :type session: AsyncHttpPipeline
158
+ :param conversation_history: The turns in the conversation so far.
159
+ :type conversation_history: List[ConversationTurn]
160
+ :param max_history: Parameters used to query GPT-4 model.
161
+ :type max_history: int
162
+ :param turn_number: Parameters used to query GPT-4 model.
163
+ :type turn_number: int
164
+ :return: The response from the ConversationBot.
165
+ :rtype: Tuple[dict, dict, float, dict]
166
+ """
167
+
168
+ # check if this is the first turn and the conversation_starter is not None,
169
+ # return the conversations starter rather than generating turn using LLM
170
+ if turn_number == 0 and self.conversation_starter is not None:
171
+ # if conversation_starter is a dictionary, pass it into samples as is
172
+ if isinstance(self.conversation_starter, dict):
173
+ samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
174
+ if isinstance(self.conversation_starter, jinja2.Template):
175
+ samples = [self.conversation_starter.render(**self.persona_template_args)]
176
+ else:
177
+ samples = [self.conversation_starter]
178
+ time_taken = 0
179
+
180
+ finish_reason = ["stop"]
181
+
182
+ parsed_response = {"samples": samples, "finish_reason": finish_reason, "id": None}
183
+ full_response = parsed_response
184
+ return parsed_response, {}, time_taken, full_response
185
+
186
+ try:
187
+ prompt = self.conversation_template.render(
188
+ conversation_turns=conversation_history[-max_history:],
189
+ role=self.role.value,
190
+ **self.persona_template_args,
191
+ )
192
+ except Exception: # pylint: disable=broad-except
193
+ import code
194
+
195
+ code.interact(local=locals())
196
+
197
+ messages = [{"role": "system", "content": prompt}]
198
+
199
+ # The ChatAPI must respond as ASSISTANT, so if this bot is USER, we need to reverse the messages
200
+ if (self.role == ConversationRole.USER) and (isinstance(self.model, (OpenAIChatCompletionsModel))):
201
+ # in here we need to simulate the user, The chatapi only generate turn as assistant and
202
+ # can't generate turn as user
203
+ # thus we reverse all rules in history messages,
204
+ # so that messages produced from the other bot passed here as user messages
205
+ messages.extend([turn.to_openai_chat_format(reverse=True) for turn in conversation_history[-max_history:]])
206
+ prompt_role = ConversationRole.USER.value
207
+ else:
208
+ messages.extend([turn.to_openai_chat_format() for turn in conversation_history[-max_history:]])
209
+ prompt_role = self.role.value
210
+
211
+ response = await self.model.get_conversation_completion(
212
+ messages=messages,
213
+ session=session,
214
+ role=prompt_role,
215
+ )
216
+
217
+ return response["response"], response["request"], response["time_taken"], response["full_response"]
218
+
219
+ def __repr__(self):
220
+ return f"Bot(name={self.name}, role={self.role.name}, model={self.model.__class__.__name__})"
221
+
222
+
223
+ class CallbackConversationBot(ConversationBot):
224
+ """Conversation bot that uses a user provided callback to generate responses.
225
+
226
+ :param callback: The callback function to use to generate responses.
227
+ :type callback: Callable
228
+ :param user_template: The template to use for the request.
229
+ :type user_template: str
230
+ :param user_template_parameters: The template parameters to use for the request.
231
+ :type user_template_parameters: Dict
232
+ :param args: Optional arguments to pass to the parent class.
233
+ :type args: Any
234
+ :param kwargs: Optional keyword arguments to pass to the parent class.
235
+ :type kwargs: Any
236
+ """
237
+
238
+ def __init__(
239
+ self,
240
+ callback: Callable,
241
+ user_template: str,
242
+ user_template_parameters: TemplateParameters,
243
+ *args,
244
+ **kwargs,
245
+ ) -> None:
246
+ self.callback = callback
247
+ self.user_template = user_template
248
+ self.user_template_parameters = user_template_parameters
249
+
250
+ super().__init__(*args, **kwargs)
251
+
252
+ async def generate_response(
253
+ self,
254
+ session: AsyncHttpPipeline,
255
+ conversation_history: List[Any],
256
+ max_history: int,
257
+ turn_number: int = 0,
258
+ ) -> Tuple[dict, dict, float, dict]:
259
+ chat_protocol_message = self._to_chat_protocol(
260
+ self.user_template, conversation_history, self.user_template_parameters
261
+ )
262
+ msg_copy = copy.deepcopy(chat_protocol_message)
263
+ result = {}
264
+ start_time = time.time()
265
+ result = await self.callback(msg_copy)
266
+ end_time = time.time()
267
+ if not result:
268
+ result = {
269
+ "messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
270
+ "finish_reason": ["stop"],
271
+ "id": None,
272
+ "template_parameters": {},
273
+ }
274
+ self.logger.info("Using user provided callback returning response.")
275
+
276
+ time_taken = end_time - start_time
277
+ try:
278
+ response = {
279
+ "samples": [result["messages"][-1]["content"]],
280
+ "finish_reason": ["stop"],
281
+ "id": None,
282
+ }
283
+ except Exception as exc:
284
+ msg = "User provided callback does not conform to chat protocol standard."
285
+ raise EvaluationException(
286
+ message=msg,
287
+ internal_message=msg,
288
+ target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
289
+ category=ErrorCategory.INVALID_VALUE,
290
+ blame=ErrorBlame.USER_ERROR,
291
+ ) from exc
292
+
293
+ self.logger.info("Parsed callback response")
294
+
295
+ return response, {}, time_taken, result
296
+
297
+ # Bug 3354264: template is unused in the method - is this intentional?
298
+ def _to_chat_protocol(self, template, conversation_history, template_parameters): # pylint: disable=unused-argument
299
+ messages = []
300
+
301
+ for _, m in enumerate(conversation_history):
302
+ messages.append({"content": m.message, "role": m.role.value})
303
+
304
+ return {
305
+ "template_parameters": template_parameters,
306
+ "messages": messages,
307
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
308
+ }
309
+
310
+
311
+ __all__ = [
312
+ "ConversationRole",
313
+ "ConversationBot",
314
+ "CallbackConversationBot",
315
+ "ConversationTurn",
316
+ ]
@@ -0,0 +1,178 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import Callable, Dict, List, Optional, Tuple, Union
8
+
9
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
+ from azure.ai.evaluation.simulator._constants import SupportedLanguages
11
+ from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
12
+
13
+ from ..._http_utils import AsyncHttpPipeline
14
+ from . import ConversationBot, ConversationTurn
15
+
16
+
17
+ def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
18
+ """Determine if a response indicates an end to the conversation.
19
+
20
+ :param response: The response to check.
21
+ :type response: Union[Dict, str]
22
+ :param recursion_depth: The current recursion depth. Defaults to 0.
23
+ :type recursion_depth: int
24
+ :return: True if the response indicates an end to the conversation, False otherwise.
25
+ :rtype: bool
26
+ """
27
+ if recursion_depth > 10:
28
+ msg = "Exceeded max call depth in is_closing_message"
29
+ raise EvaluationException(
30
+ message=msg,
31
+ internal_message=msg,
32
+ error_category=ErrorCategory.INVALID_VALUE,
33
+ error_target=ErrorTarget.CONVERSATION,
34
+ error_blame=ErrorBlame.USER_ERROR,
35
+ )
36
+
37
+ # recursively go through each inner dictionary in the JSON dict
38
+ # and check if any value entry contains a closing message
39
+ if isinstance(response, dict):
40
+ for value in response.values():
41
+ if is_closing_message(value, recursion_depth=recursion_depth + 1):
42
+ return True
43
+ elif isinstance(response, str):
44
+ return is_closing_message_helper(response)
45
+
46
+ return False
47
+
48
+
49
+ def is_closing_message_helper(response: str) -> bool:
50
+ """Determine if a response indicates an end to the conversation.
51
+
52
+ :param response: The response to check.
53
+ :type response: str
54
+ :return: True if the response indicates an end to the conversation, False otherwise.
55
+ :rtype: bool
56
+ """
57
+ message = response.lower()
58
+ if "?" in message.lower():
59
+ return False
60
+ punctuation = [".", ",", "!", ";", ":"]
61
+ for p in punctuation:
62
+ message = message.replace(p, "")
63
+ if (
64
+ "bye" not in message.lower().split()
65
+ and "goodbye" not in message.lower().split()
66
+ # and "thanks" not in message.lower()
67
+ # and "thank" not in message.lower()
68
+ ):
69
+ return False
70
+ return True
71
+
72
+
73
+ async def simulate_conversation(
74
+ *,
75
+ bots: List[ConversationBot],
76
+ session: AsyncHttpPipeline,
77
+ language: SupportedLanguages,
78
+ stopping_criteria: Callable[[str], bool] = is_closing_message,
79
+ turn_limit: int = 10,
80
+ history_limit: int = 5,
81
+ api_call_delay_sec: float = 0,
82
+ logger: logging.Logger = logging.getLogger(__name__),
83
+ ) -> Tuple[Optional[str], List[ConversationTurn]]:
84
+ """
85
+ Simulate a conversation between the given bots.
86
+
87
+ :keyword bots: List of ConversationBot instances participating in the conversation.
88
+ :paramtype bots: List[ConversationBot]
89
+ :keyword session: The session to use for making API calls.
90
+ :paramtype session: AsyncHttpPipeline
91
+ :keyword stopping_criteria: A callable that determines when the conversation should stop.
92
+ :paramtype stopping_criteria: Callable[[str], bool]
93
+ :keyword turn_limit: The maximum number of turns in the conversation. Defaults to 10.
94
+ :paramtype turn_limit: int
95
+ :keyword history_limit: The maximum number of turns to keep in the conversation history. Defaults to 5.
96
+ :paramtype history_limit: int
97
+ :keyword api_call_delay_sec: Delay between API calls in seconds. Defaults to 0.
98
+ :paramtype api_call_delay_sec: float
99
+ :keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
100
+ :paramtype logger: logging.Logger
101
+ :return: Simulation a conversation between the given bots.
102
+ :rtype: Tuple[Optional[str], List[ConversationTurn]]
103
+ """
104
+
105
+ # Read the first prompt.
106
+ (first_response, request, _, full_response) = await bots[0].generate_response(
107
+ session=session,
108
+ conversation_history=[],
109
+ max_history=history_limit,
110
+ turn_number=0,
111
+ )
112
+ if "id" in first_response:
113
+ conversation_id: Optional[str] = first_response["id"]
114
+ else:
115
+ conversation_id = None
116
+ first_prompt = first_response["samples"][0]
117
+ if language != SupportedLanguages.English:
118
+ if not isinstance(language, SupportedLanguages) or language not in SupportedLanguages:
119
+ raise Exception( # pylint: disable=broad-exception-raised
120
+ f"Language option '{language}' isn't supported. Select a supported language option from "
121
+ f"azure.ai.evaluation.simulator.SupportedLanguages: {[f'{e}' for e in SupportedLanguages]}"
122
+ )
123
+ first_prompt += f" {SUPPORTED_LANGUAGES_MAPPING[language]}"
124
+ # Add all generated turns into array to pass for each bot while generating
125
+ # new responses. We add generated response and the person generating it.
126
+ # in the case of the first turn, it is supposed to be the user search query
127
+ conversation_history = [
128
+ ConversationTurn(
129
+ role=bots[0].role,
130
+ name=bots[0].name,
131
+ message=first_prompt,
132
+ full_response=full_response,
133
+ request=request,
134
+ )
135
+ ]
136
+
137
+ # initialize the turn counter
138
+ current_turn = 1
139
+
140
+ # Keep iterating and alternate between bots until a stopping word is
141
+ # generated or maximum number of turns is reached.
142
+ while (not stopping_criteria(conversation_history[-1].message)) and (current_turn < turn_limit):
143
+ try:
144
+ current_character_idx = current_turn % len(bots)
145
+ current_bot = bots[current_character_idx]
146
+ # invoke Bot to generate response given the input request
147
+ # pass only the last generated turn without passing the bot name.
148
+ response, request, _, full_response = await current_bot.generate_response(
149
+ session=session,
150
+ conversation_history=conversation_history,
151
+ max_history=history_limit,
152
+ turn_number=current_turn,
153
+ )
154
+
155
+ # check if conversation id is null, which means conversation starter was used. use id from next turn
156
+ if conversation_id is None and "id" in response:
157
+ conversation_id = response["id"]
158
+ # add the generated response to the list of generated responses
159
+ conversation_history.append(
160
+ ConversationTurn(
161
+ role=current_bot.role,
162
+ name=current_bot.name,
163
+ message=response["samples"][0],
164
+ full_response=full_response,
165
+ request=request,
166
+ )
167
+ )
168
+ except Exception as e: # pylint: disable=broad-except
169
+ logger.warning("Error: %s", str(e))
170
+
171
+ # Increment outside the try block so we don't get stuck if
172
+ # an exception is thrown
173
+ current_turn += 1
174
+
175
+ # Sleep between consecutive requests to avoid rate limit
176
+ await asyncio.sleep(api_call_delay_sec)
177
+
178
+ return conversation_id, conversation_history
@@ -0,0 +1,30 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from enum import Enum
6
+
7
+ BOT_NAMES = ["chat_bot", "other_bot"]
8
+ TASK_BOT_NAMES = ["system_bot", "simulated_bot"]
9
+
10
+ REQUESTS_BATCH_SIZE = 200 # Number of input lines to process at once, must fit into memory
11
+ OUTPUT_FILE = "openai_api_response.jsonl"
12
+
13
+ # Azure endpoint constants
14
+ AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
15
+ COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
16
+ AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
17
+ AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
18
+ r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
19
+ r"(\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*\."
20
+ r"(inference\.ml|openai)\.azure\.com$"
21
+ )
22
+ CHAT_START_TOKEN = "<|im_start|>"
23
+ CHAT_END_TOKEN = "<|im_end|>"
24
+
25
+
26
+ class ConversationRole(Enum):
27
+ """Role in a chatbot conversation"""
28
+
29
+ USER = "user"
30
+ ASSISTANT = "assistant"
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------