azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -6,79 +6,108 @@
6
6
  import asyncio
7
7
  import logging
8
8
  import random
9
- from typing import Any, Callable, Dict, List, Optional
9
+ from typing import Any, Callable, Dict, List, Optional, Union, cast
10
+ import uuid
11
+ import warnings
10
12
 
11
13
  from tqdm import tqdm
12
14
 
15
+ from azure.ai.evaluation._common._experimental import experimental
16
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
17
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
13
18
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
14
19
  from azure.ai.evaluation._http_utils import get_async_http_client
15
20
  from azure.ai.evaluation._model_configurations import AzureAIProject
16
- from azure.ai.evaluation.simulator import AdversarialScenario
21
+ from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
17
22
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
23
+ from azure.ai.evaluation._constants import TokenScope
24
+ from azure.core.credentials import TokenCredential
18
25
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
19
- from azure.identity import DefaultAzureCredential
20
26
 
21
27
  from ._constants import SupportedLanguages
22
- from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
28
+ from ._conversation import (
29
+ CallbackConversationBot,
30
+ MultiModalConversationBot,
31
+ ConversationBot,
32
+ ConversationRole,
33
+ ConversationTurn,
34
+ )
23
35
  from ._conversation._conversation import simulate_conversation
24
36
  from ._model_tools import (
25
37
  AdversarialTemplateHandler,
26
38
  ManagedIdentityAPITokenManager,
27
39
  ProxyChatCompletionsModel,
28
40
  RAIClient,
29
- TokenScope,
30
41
  )
42
+ from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
31
43
  from ._utils import JsonLineList
32
44
 
33
45
  logger = logging.getLogger(__name__)
34
46
 
35
47
 
48
+ @experimental
36
49
  class AdversarialSimulator:
37
50
  """
38
51
  Initializes the adversarial simulator with a project scope.
39
52
 
40
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
41
- name.
42
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
53
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
54
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
55
+ :type azure_ai_project: Union[str, AzureAIProject]
43
56
  :param credential: The credential for connecting to Azure AI project.
44
57
  :type credential: ~azure.core.credentials.TokenCredential
58
+
59
+ .. admonition:: Example:
60
+
61
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
62
+ :start-after: [START adversarial_scenario]
63
+ :end-before: [END adversarial_scenario]
64
+ :language: python
65
+ :dedent: 8
66
+ :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
67
+ 2 conversation turns each (4 messages per result).
45
68
  """
46
69
 
47
- def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
70
+ def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
48
71
  """Constructor."""
49
- # check if azure_ai_project has the keys: subscription_id, resource_group_name and project_name
50
- if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
51
- msg = "azure_ai_project must contain keys: subscription_id, resource_group_name, project_name"
52
- raise EvaluationException(
53
- message=msg,
54
- internal_message=msg,
55
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
56
- category=ErrorCategory.MISSING_FIELD,
57
- blame=ErrorBlame.USER_ERROR,
72
+ warnings.warn(
73
+ "DEPRECATION NOTE: Azure AI Evaluation SDK has discontinued active development on the AdversarialSimulator class."
74
+ + " While existing functionality remains available in preview, it is no longer recommended for production workloads or future integration. "
75
+ + "We recommend users migrate to the AI Red Teaming Agent for future use as it supports full parity of functionality."
76
+ + " See https://aka.ms/airedteamingagent-sample for details on AI Red Teaming Agent.",
77
+ DeprecationWarning,
78
+ stacklevel=2,
79
+ )
80
+
81
+ if is_onedp_project(azure_ai_project):
82
+ self.azure_ai_project = azure_ai_project
83
+ self.credential = cast(TokenCredential, credential)
84
+ self.token_manager = ManagedIdentityAPITokenManager(
85
+ token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
86
+ logger=logging.getLogger("AdversarialSimulator"),
87
+ credential=self.credential,
58
88
  )
59
- # check the value of the keys in azure_ai_project is not none
60
- if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
61
- msg = "subscription_id, resource_group_name and project_name cannot be None"
62
- raise EvaluationException(
63
- message=msg,
64
- internal_message=msg,
65
- target=ErrorTarget.ADVERSARIAL_SIMULATOR,
66
- category=ErrorCategory.MISSING_FIELD,
67
- blame=ErrorBlame.USER_ERROR,
89
+ self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
90
+ else:
91
+ try:
92
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
93
+ except EvaluationException as e:
94
+ raise EvaluationException(
95
+ message=e.message,
96
+ internal_message=e.internal_message,
97
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
98
+ category=e.category,
99
+ blame=e.blame,
100
+ ) from e
101
+ self.credential = cast(TokenCredential, credential)
102
+ self.token_manager = ManagedIdentityAPITokenManager(
103
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
104
+ logger=logging.getLogger("AdversarialSimulator"),
105
+ credential=self.credential,
68
106
  )
69
- if "credential" not in azure_ai_project and not credential:
70
- credential = DefaultAzureCredential()
71
- elif "credential" in azure_ai_project:
72
- credential = azure_ai_project["credential"]
73
- self.azure_ai_project = azure_ai_project
74
- self.token_manager = ManagedIdentityAPITokenManager(
75
- token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
76
- logger=logging.getLogger("AdversarialSimulator"),
77
- credential=credential,
78
- )
79
- self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
107
+ self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
108
+
80
109
  self.adversarial_template_handler = AdversarialTemplateHandler(
81
- azure_ai_project=azure_ai_project, rai_client=self.rai_client
110
+ azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
82
111
  )
83
112
 
84
113
  def _ensure_service_dependencies(self):
@@ -92,7 +121,7 @@ class AdversarialSimulator:
92
121
  blame=ErrorBlame.USER_ERROR,
93
122
  )
94
123
 
95
- # @monitor_adversarial_scenario
124
+ # pylint: disable=too-many-locals
96
125
  async def __call__(
97
126
  self,
98
127
  *,
@@ -106,10 +135,10 @@ class AdversarialSimulator:
106
135
  api_call_retry_sleep_sec: int = 1,
107
136
  api_call_delay_sec: int = 0,
108
137
  concurrent_async_task: int = 3,
109
- _jailbreak_type: Optional[str] = None,
110
138
  language: SupportedLanguages = SupportedLanguages.English,
111
139
  randomize_order: bool = True,
112
140
  randomization_seed: Optional[int] = None,
141
+ **kwargs,
113
142
  ):
114
143
  """
115
144
  Executes the adversarial simulation against a specified target function asynchronously.
@@ -159,28 +188,6 @@ class AdversarialSimulator:
159
188
 
160
189
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
161
190
  :rtype: List[Dict[str, Any]]
162
-
163
- **Output format**
164
-
165
- .. code-block:: python
166
-
167
- return_value = [
168
- {
169
- 'template_parameters': {},
170
- 'messages': [
171
- {
172
- 'content': '<jailbreak prompt> <adversarial query>',
173
- 'role': 'user'
174
- },
175
- {
176
- 'content': "<response from endpoint>",
177
- 'role': 'assistant',
178
- 'context': None
179
- }
180
- ],
181
- '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
182
- }
183
- ]
184
191
  """
185
192
 
186
193
  # validate the inputs
@@ -202,6 +209,14 @@ class AdversarialSimulator:
202
209
  )
203
210
  self._ensure_service_dependencies()
204
211
  templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
212
+ if len(templates) == 0:
213
+ raise EvaluationException(
214
+ message="Templates not found. Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
215
+ internal_message="Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
216
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
217
+ )
218
+ simulation_id = str(uuid.uuid4())
219
+ logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
205
220
  concurrent_async_task = min(concurrent_async_task, 1000)
206
221
  semaphore = asyncio.Semaphore(concurrent_async_task)
207
222
  sim_results = []
@@ -216,46 +231,85 @@ class AdversarialSimulator:
216
231
  total_tasks,
217
232
  )
218
233
  total_tasks = min(total_tasks, max_simulation_results)
234
+ _jailbreak_type = kwargs.get("_jailbreak_type", None)
219
235
  if _jailbreak_type:
220
- jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
236
+ if isinstance(self.rai_client, RAIClient):
237
+ jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
238
+ elif isinstance(self.rai_client, AIProjectClient):
239
+ jailbreak_dataset = self.rai_client.red_teams.get_jail_break_dataset_with_type(type=_jailbreak_type)
221
240
  progress_bar = tqdm(
222
241
  total=total_tasks,
223
242
  desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
224
243
  ncols=100,
225
244
  unit="simulations",
226
245
  )
227
- for template in templates:
228
- parameter_order = list(range(len(template.template_parameters)))
229
- if randomize_order:
230
- # The template parameter lists are persistent across sim runs within a session,
231
- # So randomize a the selection instead of the parameter list directly,
232
- # or a potentially large deep copy.
233
- if randomization_seed is not None:
234
- random.seed(randomization_seed)
235
- random.shuffle(parameter_order)
236
- for index in parameter_order:
237
- parameter = template.template_parameters[index].copy()
238
- if _jailbreak_type == "upia":
239
- parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
240
- tasks.append(
241
- asyncio.create_task(
242
- self._simulate_async(
243
- target=target,
244
- template=template,
245
- parameters=parameter,
246
- max_conversation_turns=max_conversation_turns,
247
- api_call_retry_limit=api_call_retry_limit,
248
- api_call_retry_sleep_sec=api_call_retry_sleep_sec,
249
- api_call_delay_sec=api_call_delay_sec,
250
- language=language,
251
- semaphore=semaphore,
252
- )
246
+ if randomize_order:
247
+ # The template parameter lists are persistent across sim runs within a session,
248
+ # So randomize a the selection instead of the parameter list directly,
249
+ # or a potentially large deep copy.
250
+ if randomization_seed is not None:
251
+ # Create a local random instance to avoid polluting global state
252
+ local_random = random.Random(randomization_seed)
253
+ local_random.shuffle(templates)
254
+ else:
255
+ random.shuffle(templates)
256
+
257
+ # Prepare task parameters based on scenario - but use a single append call for all scenarios
258
+ tasks = []
259
+ template_parameter_pairs = []
260
+
261
+ if scenario == AdversarialScenario.ADVERSARIAL_CONVERSATION:
262
+ # For ADVERSARIAL_CONVERSATION, flatten the parameters
263
+ for i, template in enumerate(templates):
264
+ if not template.template_parameters:
265
+ continue
266
+ for parameter in template.template_parameters:
267
+ template_parameter_pairs.append((template, parameter))
268
+ else:
269
+ # Use original logic for other scenarios - zip parameters
270
+ parameter_lists = [t.template_parameters for t in templates]
271
+ zipped_parameters = list(zip(*parameter_lists))
272
+
273
+ for param_group in zipped_parameters:
274
+ for template, parameter in zip(templates, param_group):
275
+ template_parameter_pairs.append((template, parameter))
276
+
277
+ # Limit to max_simulation_results if needed
278
+ if len(template_parameter_pairs) > max_simulation_results:
279
+ template_parameter_pairs = template_parameter_pairs[
280
+ :max_simulation_results
281
+ ] # Create a seeded random instance for jailbreak selection if randomization_seed is provided
282
+ jailbreak_random = None
283
+ if _jailbreak_type == "upia" and randomization_seed is not None:
284
+ jailbreak_random = random.Random(randomization_seed)
285
+
286
+ # Single task append loop for all scenarios
287
+ for template, parameter in template_parameter_pairs:
288
+ if _jailbreak_type == "upia":
289
+ if jailbreak_random is not None:
290
+ selected_jailbreak = jailbreak_random.choice(jailbreak_dataset)
291
+ else:
292
+ selected_jailbreak = random.choice(jailbreak_dataset)
293
+ parameter = self._add_jailbreak_parameter(parameter, selected_jailbreak)
294
+
295
+ tasks.append(
296
+ asyncio.create_task(
297
+ self._simulate_async(
298
+ target=target,
299
+ template=template,
300
+ parameters=parameter,
301
+ max_conversation_turns=max_conversation_turns,
302
+ api_call_retry_limit=api_call_retry_limit,
303
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
304
+ api_call_delay_sec=api_call_delay_sec,
305
+ language=language,
306
+ semaphore=semaphore,
307
+ scenario=scenario,
308
+ simulation_id=simulation_id,
253
309
  )
254
310
  )
255
- if len(tasks) >= max_simulation_results:
256
- break
257
- if len(tasks) >= max_simulation_results:
258
- break
311
+ )
312
+
259
313
  for task in asyncio.as_completed(tasks):
260
314
  sim_results.append(await task)
261
315
  progress_bar.update(1)
@@ -263,16 +317,21 @@ class AdversarialSimulator:
263
317
 
264
318
  return JsonLineList(sim_results)
265
319
 
266
- def _to_chat_protocol(self, *, conversation_history, template_parameters: Dict = None):
320
+ def _to_chat_protocol(
321
+ self,
322
+ *,
323
+ conversation_history: List[ConversationTurn],
324
+ template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
325
+ ):
267
326
  if template_parameters is None:
268
327
  template_parameters = {}
269
328
  messages = []
270
329
  for _, m in enumerate(conversation_history):
271
330
  message = {"content": m.message, "role": m.role.value}
272
- if "context" in m.full_response:
331
+ if m.full_response is not None and "context" in m.full_response:
273
332
  message["context"] = m.full_response["context"]
274
333
  messages.append(message)
275
- conversation_category = template_parameters.pop("metadata", {}).get("Category")
334
+ conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
276
335
  template_parameters["metadata"] = {}
277
336
  for key in (
278
337
  "conversation_starter",
@@ -280,6 +339,9 @@ class AdversarialSimulator:
280
339
  "target_population",
281
340
  "topic",
282
341
  "ch_template_placeholder",
342
+ "chatbot_name",
343
+ "name",
344
+ "group",
283
345
  ):
284
346
  template_parameters.pop(key, None)
285
347
  if conversation_category:
@@ -294,54 +356,92 @@ class AdversarialSimulator:
294
356
  self,
295
357
  *,
296
358
  target: Callable,
297
- template,
298
- parameters,
299
- max_conversation_turns,
300
- api_call_retry_limit,
301
- api_call_retry_sleep_sec,
302
- api_call_delay_sec,
303
- language,
304
- semaphore,
359
+ template: AdversarialTemplate,
360
+ parameters: TemplateParameters,
361
+ max_conversation_turns: int,
362
+ api_call_retry_limit: int,
363
+ api_call_retry_sleep_sec: int,
364
+ api_call_delay_sec: int,
365
+ language: SupportedLanguages,
366
+ semaphore: asyncio.Semaphore,
367
+ scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
368
+ simulation_id: str = "",
305
369
  ) -> List[Dict]:
306
- user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
370
+ user_bot = self._setup_bot(
371
+ role=ConversationRole.USER,
372
+ template=template,
373
+ parameters=parameters,
374
+ scenario=scenario,
375
+ simulation_id=simulation_id,
376
+ )
307
377
  system_bot = self._setup_bot(
308
- target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
378
+ target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
309
379
  )
310
380
  bots = [user_bot, system_bot]
311
- session = get_async_http_client().with_policies(
312
- retry_policy=AsyncRetryPolicy(
313
- retry_total=api_call_retry_limit,
314
- retry_backoff_factor=api_call_retry_sleep_sec,
315
- retry_mode=RetryMode.Fixed,
316
- )
317
- )
318
381
 
319
- async with semaphore, session:
320
- _, conversation_history = await simulate_conversation(
321
- bots=bots,
322
- session=session,
323
- turn_limit=max_conversation_turns,
324
- api_call_delay_sec=api_call_delay_sec,
325
- language=language,
382
+ async def run_simulation(session_obj):
383
+ async with semaphore:
384
+ _, conversation_history = await simulate_conversation(
385
+ bots=bots,
386
+ session=session_obj,
387
+ turn_limit=max_conversation_turns,
388
+ api_call_delay_sec=api_call_delay_sec,
389
+ language=language,
390
+ )
391
+ return conversation_history
392
+
393
+ if isinstance(self.rai_client, AIProjectClient):
394
+ session = self.rai_client
395
+ else:
396
+ session = get_async_http_client().with_policies(
397
+ retry_policy=AsyncRetryPolicy(
398
+ retry_total=api_call_retry_limit,
399
+ retry_backoff_factor=api_call_retry_sleep_sec,
400
+ retry_mode=RetryMode.Fixed,
401
+ )
326
402
  )
327
- return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)
403
+ conversation_history = await run_simulation(session)
404
+
405
+ return self._to_chat_protocol(
406
+ conversation_history=conversation_history,
407
+ template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
408
+ )
328
409
 
329
- def _get_user_proxy_completion_model(self, template_key, template_parameters):
410
+ def _get_user_proxy_completion_model(
411
+ self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
412
+ ) -> ProxyChatCompletionsModel:
413
+ endpoint_url = (
414
+ self.rai_client._config.endpoint + "/redTeams/simulation/chat/completions/submit"
415
+ if isinstance(self.rai_client, AIProjectClient)
416
+ else self.rai_client.simulation_submit_endpoint
417
+ )
330
418
  return ProxyChatCompletionsModel(
331
419
  name="raisvc_proxy_model",
332
420
  template_key=template_key,
333
421
  template_parameters=template_parameters,
334
- endpoint_url=self.rai_client.simulation_submit_endpoint,
422
+ endpoint_url=endpoint_url,
335
423
  token_manager=self.token_manager,
336
424
  api_version="2023-07-01-preview",
337
425
  max_tokens=1200,
338
426
  temperature=0.0,
427
+ simulation_id=simulation_id,
339
428
  )
340
429
 
341
- def _setup_bot(self, *, role, template, parameters, target: Callable = None):
342
- if role == ConversationRole.USER:
430
+ def _setup_bot(
431
+ self,
432
+ *,
433
+ role: ConversationRole,
434
+ template: AdversarialTemplate,
435
+ parameters: TemplateParameters,
436
+ target: Optional[Callable] = None,
437
+ scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
438
+ simulation_id: str = "",
439
+ ) -> ConversationBot:
440
+ if role is ConversationRole.USER:
343
441
  model = self._get_user_proxy_completion_model(
344
- template_key=template.template_name, template_parameters=parameters
442
+ template_key=template.template_name,
443
+ template_parameters=parameters,
444
+ simulation_id=simulation_id,
345
445
  )
346
446
  return ConversationBot(
347
447
  role=role,
@@ -350,35 +450,61 @@ class AdversarialSimulator:
350
450
  instantiation_parameters=parameters,
351
451
  )
352
452
 
353
- if role == ConversationRole.ASSISTANT:
453
+ if role is ConversationRole.ASSISTANT:
454
+ if target is None:
455
+ msg = "Cannot setup system bot. Target is None"
354
456
 
355
- def dummy_model() -> None:
356
- return None
457
+ raise EvaluationException(
458
+ message=msg,
459
+ internal_message=msg,
460
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
461
+ error_category=ErrorCategory.INVALID_VALUE,
462
+ blame=ErrorBlame.SYSTEM_ERROR,
463
+ )
464
+
465
+ class DummyModel:
466
+ def __init__(self):
467
+ self.name = "dummy_model"
468
+
469
+ def __call__(self) -> None:
470
+ pass
471
+
472
+ if scenario in [
473
+ _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_GEN,
474
+ _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_MULTIMODAL,
475
+ ]:
476
+ return MultiModalConversationBot(
477
+ callback=target,
478
+ role=role,
479
+ model=DummyModel(),
480
+ user_template=str(template),
481
+ user_template_parameters=parameters,
482
+ rai_client=self.rai_client,
483
+ conversation_template="",
484
+ instantiation_parameters={},
485
+ )
357
486
 
358
- dummy_model.name = "dummy_model"
359
487
  return CallbackConversationBot(
360
488
  callback=target,
361
489
  role=role,
362
- model=dummy_model,
490
+ model=DummyModel(),
363
491
  user_template=str(template),
364
492
  user_template_parameters=parameters,
365
493
  conversation_template="",
366
494
  instantiation_parameters={},
367
495
  )
368
- return ConversationBot(
369
- role=role,
370
- model=model,
371
- conversation_template=template,
372
- instantiation_parameters=parameters,
373
- )
374
496
 
375
- def _join_conversation_starter(self, parameters, to_join):
376
- key = "conversation_starter"
377
- if key in parameters.keys():
378
- parameters[key] = f"{to_join} {parameters[key]}"
379
- else:
380
- parameters[key] = to_join
497
+ msg = "Invalid value for enum ConversationRole. This should never happen."
498
+ raise EvaluationException(
499
+ message=msg,
500
+ internal_message=msg,
501
+ target=ErrorTarget.ADVERSARIAL_SIMULATOR,
502
+ category=ErrorCategory.INVALID_VALUE,
503
+ blame=ErrorBlame.SYSTEM_ERROR,
504
+ )
381
505
 
506
+ def _add_jailbreak_parameter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
507
+ parameters["jailbreak_string"] = to_join
382
508
  return parameters
383
509
 
384
510
  def call_sync(
@@ -5,7 +5,17 @@ from enum import Enum
5
5
 
6
6
 
7
7
  class SupportedLanguages(Enum):
8
- """Supported languages for evaluation, using ISO standard language codes."""
8
+ """Supported languages for evaluation, using ISO standard language codes.
9
+
10
+ .. admonition:: Example:
11
+
12
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
13
+ :start-after: [START supported_languages]
14
+ :end-before: [END supported_languages]
15
+ :language: python
16
+ :dedent: 8
17
+ :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
18
+ """
9
19
 
10
20
  Spanish = "es"
11
21
  Italian = "it"
@@ -15,3 +25,4 @@ class SupportedLanguages(Enum):
15
25
  Portuguese = "pt"
16
26
  Japanese = "ja"
17
27
  English = "en"
28
+ Korean = "ko"