azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +85 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +147 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +87 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
  155. azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
  156. azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
  157. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  158. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  159. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  160. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  161. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  162. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  163. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  165. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
  264. azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
@@ -6,30 +6,38 @@
6
6
  import asyncio
7
7
  import logging
8
8
  import random
9
- from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
10
- from itertools import zip_longest
9
+ from typing import Any, Callable, Dict, List, Optional, Union, cast
10
+ import uuid
11
+ import warnings
11
12
 
12
13
  from tqdm import tqdm
13
14
 
14
15
  from azure.ai.evaluation._common._experimental import experimental
15
- from azure.ai.evaluation._common.utils import validate_azure_ai_project
16
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
17
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
16
18
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
19
  from azure.ai.evaluation._http_utils import get_async_http_client
18
20
  from azure.ai.evaluation._model_configurations import AzureAIProject
19
- from azure.ai.evaluation.simulator import AdversarialScenario
21
+ from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
20
22
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
23
+ from azure.ai.evaluation._constants import TokenScope
21
24
  from azure.core.credentials import TokenCredential
22
25
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
23
26
 
24
27
  from ._constants import SupportedLanguages
25
- from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
28
+ from ._conversation import (
29
+ CallbackConversationBot,
30
+ MultiModalConversationBot,
31
+ ConversationBot,
32
+ ConversationRole,
33
+ ConversationTurn,
34
+ )
26
35
  from ._conversation._conversation import simulate_conversation
27
36
  from ._model_tools import (
28
37
  AdversarialTemplateHandler,
29
38
  ManagedIdentityAPITokenManager,
30
39
  ProxyChatCompletionsModel,
31
40
  RAIClient,
32
- TokenScope,
33
41
  )
34
42
  from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
35
43
  from ._utils import JsonLineList
@@ -42,9 +50,9 @@ class AdversarialSimulator:
42
50
  """
43
51
  Initializes the adversarial simulator with a project scope.
44
52
 
45
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
46
- name.
47
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
53
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
54
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
55
+ :type azure_ai_project: Union[str, AzureAIProject]
48
56
  :param credential: The credential for connecting to Azure AI project.
49
57
  :type credential: ~azure.core.credentials.TokenCredential
50
58
 
@@ -59,26 +67,45 @@ class AdversarialSimulator:
59
67
  2 conversation turns each (4 messages per result).
60
68
  """
61
69
 
62
- def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
70
+ def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
63
71
  """Constructor."""
64
-
65
- try:
66
- self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
67
- except EvaluationException as e:
68
- raise EvaluationException(
69
- message=e.message,
70
- internal_message=e.internal_message,
71
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
72
- category=e.category,
73
- blame=e.blame,
74
- ) from e
75
-
76
- self.token_manager = ManagedIdentityAPITokenManager(
77
- token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
78
- logger=logging.getLogger("AdversarialSimulator"),
79
- credential=cast(TokenCredential, credential),
72
+ warnings.warn(
73
+ "DEPRECATION NOTE: Azure AI Evaluation SDK has discontinued active development on the AdversarialSimulator class."
74
+ + " While existing functionality remains available in preview, it is no longer recommended for production workloads or future integration. "
75
+ + "We recommend users migrate to the AI Red Teaming Agent for future use as it supports full parity of functionality."
76
+ + " See https://aka.ms/airedteamingagent-sample for details on AI Red Teaming Agent.",
77
+ DeprecationWarning,
78
+ stacklevel=2,
80
79
  )
81
- self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
80
+
81
+ if is_onedp_project(azure_ai_project):
82
+ self.azure_ai_project = azure_ai_project
83
+ self.credential = cast(TokenCredential, credential)
84
+ self.token_manager = ManagedIdentityAPITokenManager(
85
+ token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
86
+ logger=logging.getLogger("AdversarialSimulator"),
87
+ credential=self.credential,
88
+ )
89
+ self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
90
+ else:
91
+ try:
92
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
93
+ except EvaluationException as e:
94
+ raise EvaluationException(
95
+ message=e.message,
96
+ internal_message=e.internal_message,
97
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
98
+ category=e.category,
99
+ blame=e.blame,
100
+ ) from e
101
+ self.credential = cast(TokenCredential, credential)
102
+ self.token_manager = ManagedIdentityAPITokenManager(
103
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
104
+ logger=logging.getLogger("AdversarialSimulator"),
105
+ credential=self.credential,
106
+ )
107
+ self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
108
+
82
109
  self.adversarial_template_handler = AdversarialTemplateHandler(
83
110
  azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
84
111
  )
@@ -182,6 +209,14 @@ class AdversarialSimulator:
182
209
  )
183
210
  self._ensure_service_dependencies()
184
211
  templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
212
+ if len(templates) == 0:
213
+ raise EvaluationException(
214
+ message="Templates not found. Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
215
+ internal_message="Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
216
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
217
+ )
218
+ simulation_id = str(uuid.uuid4())
219
+ logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
185
220
  concurrent_async_task = min(concurrent_async_task, 1000)
186
221
  semaphore = asyncio.Semaphore(concurrent_async_task)
187
222
  sim_results = []
@@ -198,46 +233,83 @@ class AdversarialSimulator:
198
233
  total_tasks = min(total_tasks, max_simulation_results)
199
234
  _jailbreak_type = kwargs.get("_jailbreak_type", None)
200
235
  if _jailbreak_type:
201
- jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
236
+ if isinstance(self.rai_client, RAIClient):
237
+ jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
238
+ elif isinstance(self.rai_client, AIProjectClient):
239
+ jailbreak_dataset = self.rai_client.red_teams.get_jail_break_dataset_with_type(type=_jailbreak_type)
202
240
  progress_bar = tqdm(
203
241
  total=total_tasks,
204
242
  desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
205
243
  ncols=100,
206
244
  unit="simulations",
207
245
  )
208
-
209
246
  if randomize_order:
210
247
  # The template parameter lists are persistent across sim runs within a session,
211
248
  # So randomize a the selection instead of the parameter list directly,
212
249
  # or a potentially large deep copy.
213
250
  if randomization_seed is not None:
214
- random.seed(randomization_seed)
215
- random.shuffle(templates)
216
- parameter_lists = [t.template_parameters for t in templates]
217
- zipped_parameters = list(zip_longest(*parameter_lists))
218
- for param_group in zipped_parameters:
219
- for template, parameter in zip(templates, param_group):
220
- if _jailbreak_type == "upia":
221
- parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
222
- tasks.append(
223
- asyncio.create_task(
224
- self._simulate_async(
225
- target=target,
226
- template=template,
227
- parameters=parameter,
228
- max_conversation_turns=max_conversation_turns,
229
- api_call_retry_limit=api_call_retry_limit,
230
- api_call_retry_sleep_sec=api_call_retry_sleep_sec,
231
- api_call_delay_sec=api_call_delay_sec,
232
- language=language,
233
- semaphore=semaphore,
234
- )
251
+ # Create a local random instance to avoid polluting global state
252
+ local_random = random.Random(randomization_seed)
253
+ local_random.shuffle(templates)
254
+ else:
255
+ random.shuffle(templates)
256
+
257
+ # Prepare task parameters based on scenario - but use a single append call for all scenarios
258
+ tasks = []
259
+ template_parameter_pairs = []
260
+
261
+ if scenario == AdversarialScenario.ADVERSARIAL_CONVERSATION:
262
+ # For ADVERSARIAL_CONVERSATION, flatten the parameters
263
+ for i, template in enumerate(templates):
264
+ if not template.template_parameters:
265
+ continue
266
+ for parameter in template.template_parameters:
267
+ template_parameter_pairs.append((template, parameter))
268
+ else:
269
+ # Use original logic for other scenarios - zip parameters
270
+ parameter_lists = [t.template_parameters for t in templates]
271
+ zipped_parameters = list(zip(*parameter_lists))
272
+
273
+ for param_group in zipped_parameters:
274
+ for template, parameter in zip(templates, param_group):
275
+ template_parameter_pairs.append((template, parameter))
276
+
277
+ # Limit to max_simulation_results if needed
278
+ if len(template_parameter_pairs) > max_simulation_results:
279
+ template_parameter_pairs = template_parameter_pairs[
280
+ :max_simulation_results
281
+ ] # Create a seeded random instance for jailbreak selection if randomization_seed is provided
282
+ jailbreak_random = None
283
+ if _jailbreak_type == "upia" and randomization_seed is not None:
284
+ jailbreak_random = random.Random(randomization_seed)
285
+
286
+ # Single task append loop for all scenarios
287
+ for template, parameter in template_parameter_pairs:
288
+ if _jailbreak_type == "upia":
289
+ if jailbreak_random is not None:
290
+ selected_jailbreak = jailbreak_random.choice(jailbreak_dataset)
291
+ else:
292
+ selected_jailbreak = random.choice(jailbreak_dataset)
293
+ parameter = self._add_jailbreak_parameter(parameter, selected_jailbreak)
294
+
295
+ tasks.append(
296
+ asyncio.create_task(
297
+ self._simulate_async(
298
+ target=target,
299
+ template=template,
300
+ parameters=parameter,
301
+ max_conversation_turns=max_conversation_turns,
302
+ api_call_retry_limit=api_call_retry_limit,
303
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
304
+ api_call_delay_sec=api_call_delay_sec,
305
+ language=language,
306
+ semaphore=semaphore,
307
+ scenario=scenario,
308
+ simulation_id=simulation_id,
235
309
  )
236
310
  )
237
- if len(tasks) >= max_simulation_results:
238
- break
239
- if len(tasks) >= max_simulation_results:
240
- break
311
+ )
312
+
241
313
  for task in asyncio.as_completed(tasks):
242
314
  sim_results.append(await task)
243
315
  progress_bar.update(1)
@@ -292,28 +364,43 @@ class AdversarialSimulator:
292
364
  api_call_delay_sec: int,
293
365
  language: SupportedLanguages,
294
366
  semaphore: asyncio.Semaphore,
367
+ scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
368
+ simulation_id: str = "",
295
369
  ) -> List[Dict]:
296
- user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
370
+ user_bot = self._setup_bot(
371
+ role=ConversationRole.USER,
372
+ template=template,
373
+ parameters=parameters,
374
+ scenario=scenario,
375
+ simulation_id=simulation_id,
376
+ )
297
377
  system_bot = self._setup_bot(
298
- target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
378
+ target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
299
379
  )
300
380
  bots = [user_bot, system_bot]
301
- session = get_async_http_client().with_policies(
302
- retry_policy=AsyncRetryPolicy(
303
- retry_total=api_call_retry_limit,
304
- retry_backoff_factor=api_call_retry_sleep_sec,
305
- retry_mode=RetryMode.Fixed,
306
- )
307
- )
308
381
 
309
- async with semaphore, session:
310
- _, conversation_history = await simulate_conversation(
311
- bots=bots,
312
- session=session,
313
- turn_limit=max_conversation_turns,
314
- api_call_delay_sec=api_call_delay_sec,
315
- language=language,
382
+ async def run_simulation(session_obj):
383
+ async with semaphore:
384
+ _, conversation_history = await simulate_conversation(
385
+ bots=bots,
386
+ session=session_obj,
387
+ turn_limit=max_conversation_turns,
388
+ api_call_delay_sec=api_call_delay_sec,
389
+ language=language,
390
+ )
391
+ return conversation_history
392
+
393
+ if isinstance(self.rai_client, AIProjectClient):
394
+ session = self.rai_client
395
+ else:
396
+ session = get_async_http_client().with_policies(
397
+ retry_policy=AsyncRetryPolicy(
398
+ retry_total=api_call_retry_limit,
399
+ retry_backoff_factor=api_call_retry_sleep_sec,
400
+ retry_mode=RetryMode.Fixed,
401
+ )
316
402
  )
403
+ conversation_history = await run_simulation(session)
317
404
 
318
405
  return self._to_chat_protocol(
319
406
  conversation_history=conversation_history,
@@ -321,17 +408,23 @@ class AdversarialSimulator:
321
408
  )
322
409
 
323
410
  def _get_user_proxy_completion_model(
324
- self, template_key: str, template_parameters: TemplateParameters
411
+ self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
325
412
  ) -> ProxyChatCompletionsModel:
413
+ endpoint_url = (
414
+ self.rai_client._config.endpoint + "/redTeams/simulation/chat/completions/submit"
415
+ if isinstance(self.rai_client, AIProjectClient)
416
+ else self.rai_client.simulation_submit_endpoint
417
+ )
326
418
  return ProxyChatCompletionsModel(
327
419
  name="raisvc_proxy_model",
328
420
  template_key=template_key,
329
421
  template_parameters=template_parameters,
330
- endpoint_url=self.rai_client.simulation_submit_endpoint,
422
+ endpoint_url=endpoint_url,
331
423
  token_manager=self.token_manager,
332
424
  api_version="2023-07-01-preview",
333
425
  max_tokens=1200,
334
426
  temperature=0.0,
427
+ simulation_id=simulation_id,
335
428
  )
336
429
 
337
430
  def _setup_bot(
@@ -341,10 +434,14 @@ class AdversarialSimulator:
341
434
  template: AdversarialTemplate,
342
435
  parameters: TemplateParameters,
343
436
  target: Optional[Callable] = None,
437
+ scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
438
+ simulation_id: str = "",
344
439
  ) -> ConversationBot:
345
440
  if role is ConversationRole.USER:
346
441
  model = self._get_user_proxy_completion_model(
347
- template_key=template.template_name, template_parameters=parameters
442
+ template_key=template.template_name,
443
+ template_parameters=parameters,
444
+ simulation_id=simulation_id,
348
445
  )
349
446
  return ConversationBot(
350
447
  role=role,
@@ -372,6 +469,21 @@ class AdversarialSimulator:
372
469
  def __call__(self) -> None:
373
470
  pass
374
471
 
472
+ if scenario in [
473
+ _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_GEN,
474
+ _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_MULTIMODAL,
475
+ ]:
476
+ return MultiModalConversationBot(
477
+ callback=target,
478
+ role=role,
479
+ model=DummyModel(),
480
+ user_template=str(template),
481
+ user_template_parameters=parameters,
482
+ rai_client=self.rai_client,
483
+ conversation_template="",
484
+ instantiation_parameters={},
485
+ )
486
+
375
487
  return CallbackConversationBot(
376
488
  callback=target,
377
489
  role=role,
@@ -391,13 +503,8 @@ class AdversarialSimulator:
391
503
  blame=ErrorBlame.SYSTEM_ERROR,
392
504
  )
393
505
 
394
- def _join_conversation_starter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
395
- key: Literal["conversation_starter"] = "conversation_starter"
396
- if key in parameters.keys():
397
- parameters[key] = f"{to_join} {parameters[key]}"
398
- else:
399
- parameters[key] = to_join
400
-
506
+ def _add_jailbreak_parameter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
507
+ parameters["jailbreak_string"] = to_join
401
508
  return parameters
402
509
 
403
510
  def call_sync(
@@ -25,3 +25,4 @@ class SupportedLanguages(Enum):
25
25
  Portuguese = "pt"
26
26
  Japanese = "ja"
27
27
  English = "en"
28
+ Korean = "ko"
@@ -8,13 +8,14 @@ import logging
8
8
  import time
9
9
  from dataclasses import dataclass
10
10
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
11
-
11
+ import base64
12
+ import re
12
13
  import jinja2
13
14
 
14
15
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
15
16
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
16
-
17
- from .._model_tools import LLMBase, OpenAIChatCompletionsModel
17
+ from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
18
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
18
19
  from .._model_tools._template_handler import TemplateParameters
19
20
  from .constants import ConversationRole
20
21
 
@@ -136,7 +137,7 @@ class ConversationBot:
136
137
  self.conversation_starter = jinja2.Template(
137
138
  conversation_starter_content, undefined=jinja2.StrictUndefined
138
139
  )
139
- except jinja2.exceptions.TemplateSyntaxError: # noqa: F841
140
+ except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
140
141
  self.conversation_starter = conversation_starter_content
141
142
  else:
142
143
  self.logger.info(
@@ -145,10 +146,11 @@ class ConversationBot:
145
146
 
146
147
  async def generate_response(
147
148
  self,
148
- session: AsyncHttpPipeline,
149
+ session: Union[AsyncHttpPipeline, AIProjectClient],
149
150
  conversation_history: List[ConversationTurn],
150
151
  max_history: int,
151
152
  turn_number: int = 0,
153
+ session_state: Optional[Dict[str, Any]] = None,
152
154
  ) -> Tuple[dict, dict, float, dict]:
153
155
  """
154
156
  Prompt the ConversationBot for a response.
@@ -175,6 +177,9 @@ class ConversationBot:
175
177
  samples = [self.conversation_starter.render(**self.persona_template_args)]
176
178
  else:
177
179
  samples = [self.conversation_starter]
180
+ jailbreak_string = self.persona_template_args.get("jailbreak_string", None)
181
+ if jailbreak_string:
182
+ samples = [f"{jailbreak_string} {samples[0]}"]
178
183
  time_taken = 0
179
184
 
180
185
  finish_reason = ["stop"]
@@ -251,10 +256,11 @@ class CallbackConversationBot(ConversationBot):
251
256
 
252
257
  async def generate_response(
253
258
  self,
254
- session: AsyncHttpPipeline,
259
+ session: Union[AsyncHttpPipeline, AIProjectClient],
255
260
  conversation_history: List[Any],
256
261
  max_history: int,
257
262
  turn_number: int = 0,
263
+ session_state: Optional[Dict[str, Any]] = None,
258
264
  ) -> Tuple[dict, dict, float, dict]:
259
265
  chat_protocol_message = self._to_chat_protocol(
260
266
  self.user_template, conversation_history, self.user_template_parameters
@@ -262,7 +268,7 @@ class CallbackConversationBot(ConversationBot):
262
268
  msg_copy = copy.deepcopy(chat_protocol_message)
263
269
  result = {}
264
270
  start_time = time.time()
265
- result = await self.callback(msg_copy)
271
+ result = await self.callback(msg_copy, session_state=session_state)
266
272
  end_time = time.time()
267
273
  if not result:
268
274
  result = {
@@ -271,8 +277,6 @@ class CallbackConversationBot(ConversationBot):
271
277
  "id": None,
272
278
  "template_parameters": {},
273
279
  }
274
- self.logger.info("Using user provided callback returning response.")
275
-
276
280
  time_taken = end_time - start_time
277
281
  try:
278
282
  response = {
@@ -290,8 +294,6 @@ class CallbackConversationBot(ConversationBot):
290
294
  blame=ErrorBlame.USER_ERROR,
291
295
  ) from exc
292
296
 
293
- self.logger.info("Parsed callback response")
294
-
295
297
  return response, {}, time_taken, result
296
298
 
297
299
  # Bug 3354264: template is unused in the method - is this intentional?
@@ -308,9 +310,134 @@ class CallbackConversationBot(ConversationBot):
308
310
  }
309
311
 
310
312
 
313
+ class MultiModalConversationBot(ConversationBot):
314
+ """MultiModal Conversation bot that uses a user provided callback to generate responses.
315
+
316
+ :param callback: The callback function to use to generate responses.
317
+ :type callback: Callable
318
+ :param user_template: The template to use for the request.
319
+ :type user_template: str
320
+ :param user_template_parameters: The template parameters to use for the request.
321
+ :type user_template_parameters: Dict
322
+ :param args: Optional arguments to pass to the parent class.
323
+ :type args: Any
324
+ :param kwargs: Optional keyword arguments to pass to the parent class.
325
+ :type kwargs: Any
326
+ """
327
+
328
+ def __init__(
329
+ self,
330
+ callback: Callable,
331
+ user_template: str,
332
+ user_template_parameters: TemplateParameters,
333
+ rai_client: Union[RAIClient, AIProjectClient],
334
+ *args,
335
+ **kwargs,
336
+ ) -> None:
337
+ self.callback = callback
338
+ self.user_template = user_template
339
+ self.user_template_parameters = user_template_parameters
340
+ self.rai_client = rai_client
341
+
342
+ super().__init__(*args, **kwargs)
343
+
344
+ async def generate_response(
345
+ self,
346
+ session: Union[AsyncHttpPipeline, AIProjectClient],
347
+ conversation_history: List[Any],
348
+ max_history: int,
349
+ turn_number: int = 0,
350
+ session_state: Optional[Dict[str, Any]] = None,
351
+ ) -> Tuple[dict, dict, float, dict]:
352
+ previous_prompt = conversation_history[-1]
353
+ chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
354
+
355
+ # replace prompt with {image.jpg} tags with image content data.
356
+ conversation_history.pop()
357
+ conversation_history.append(
358
+ ConversationTurn(
359
+ role=previous_prompt.role,
360
+ name=previous_prompt.name,
361
+ message=chat_protocol_message["messages"][0]["content"],
362
+ full_response=previous_prompt.full_response,
363
+ request=chat_protocol_message,
364
+ )
365
+ )
366
+ msg_copy = copy.deepcopy(chat_protocol_message)
367
+ result = {}
368
+ start_time = time.time()
369
+ result = await self.callback(msg_copy)
370
+ end_time = time.time()
371
+ if not result:
372
+ result = {
373
+ "messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
374
+ "finish_reason": ["stop"],
375
+ "id": None,
376
+ "template_parameters": {},
377
+ }
378
+
379
+ time_taken = end_time - start_time
380
+ try:
381
+ response = {
382
+ "samples": [result["messages"][-1]["content"]],
383
+ "finish_reason": ["stop"],
384
+ "id": None,
385
+ }
386
+ except Exception as exc:
387
+ msg = "User provided callback does not conform to chat protocol standard."
388
+ raise EvaluationException(
389
+ message=msg,
390
+ internal_message=msg,
391
+ target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
392
+ category=ErrorCategory.INVALID_VALUE,
393
+ blame=ErrorBlame.USER_ERROR,
394
+ ) from exc
395
+
396
+ return response, chat_protocol_message, time_taken, result
397
+
398
+ async def _to_chat_protocol(self, conversation_history, template_parameters): # pylint: disable=unused-argument
399
+ messages = []
400
+
401
+ for _, m in enumerate(conversation_history):
402
+ if "image:" in m.message:
403
+ content = await self._to_multi_modal_content(m.message)
404
+ messages.append({"content": content, "role": m.role.value})
405
+ else:
406
+ messages.append({"content": m.message, "role": m.role.value})
407
+
408
+ return {
409
+ "template_parameters": template_parameters,
410
+ "messages": messages,
411
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
412
+ }
413
+
414
+ async def _to_multi_modal_content(self, text: str) -> list:
415
+ split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
416
+ messages = [
417
+ text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
418
+ ]
419
+ contents = []
420
+ for msg in messages:
421
+ if msg.startswith("image_understanding/"):
422
+ if isinstance(self.rai_client, RAIClient):
423
+ encoded_image = await self.rai_client.get_image_data(msg)
424
+ else:
425
+ response = self.rai_client.red_teams.get_template_parameters_image(path=msg, stream="true")
426
+ image_data = b"".join(response)
427
+ encoded_image = base64.b64encode(image_data).decode("utf-8")
428
+
429
+ contents.append(
430
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
431
+ )
432
+ else:
433
+ contents.append({"type": "text", "text": msg})
434
+ return contents
435
+
436
+
311
437
  __all__ = [
312
438
  "ConversationRole",
313
439
  "ConversationBot",
314
440
  "CallbackConversationBot",
441
+ "MultiModalConversationBot",
315
442
  "ConversationTurn",
316
443
  ]
@@ -9,9 +9,9 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
9
9
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
10
  from azure.ai.evaluation.simulator._constants import SupportedLanguages
11
11
  from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
12
-
13
12
  from ..._http_utils import AsyncHttpPipeline
14
13
  from . import ConversationBot, ConversationTurn
14
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
15
15
 
16
16
 
17
17
  def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
@@ -73,7 +73,7 @@ def is_closing_message_helper(response: str) -> bool:
73
73
  async def simulate_conversation(
74
74
  *,
75
75
  bots: List[ConversationBot],
76
- session: AsyncHttpPipeline,
76
+ session: Union[AsyncHttpPipeline, AIProjectClient],
77
77
  language: SupportedLanguages,
78
78
  stopping_criteria: Callable[[str], bool] = is_closing_message,
79
79
  turn_limit: int = 10,
@@ -102,6 +102,7 @@ async def simulate_conversation(
102
102
  :rtype: Tuple[Optional[str], List[ConversationTurn]]
103
103
  """
104
104
 
105
+ session_state = {}
105
106
  # Read the first prompt.
106
107
  (first_response, request, _, full_response) = await bots[0].generate_response(
107
108
  session=session,
@@ -150,7 +151,10 @@ async def simulate_conversation(
150
151
  conversation_history=conversation_history,
151
152
  max_history=history_limit,
152
153
  turn_number=current_turn,
154
+ session_state=session_state,
153
155
  )
156
+ if "session_state" in full_response and full_response["session_state"] is not None:
157
+ session_state.update(full_response["session_state"])
154
158
 
155
159
  # check if conversation id is null, which means conversation starter was used. use id from next turn
156
160
  if conversation_id is None and "id" in response:
@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
12
12
 
13
13
  # Azure endpoint constants
14
14
  AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
15
- COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
15
+ COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
16
16
  AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
17
17
  AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
18
18
  r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"