azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -1,102 +1,82 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
+ # pylint: disable=C0301,C0114,R0913,R0903
4
5
  # noqa: E501
5
- import functools
6
6
  import logging
7
7
  from random import randint
8
- from typing import Callable, Optional
9
-
10
- from promptflow._sdk._telemetry import ActivityType, monitor_operation
8
+ from typing import Callable, Optional, cast, Union
11
9
 
10
+ from azure.ai.evaluation._constants import TokenScope
11
+ from azure.ai.evaluation._common._experimental import experimental
12
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
12
13
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
- from azure.ai.evaluation._model_configurations import AzureAIProject
14
14
  from azure.ai.evaluation.simulator import AdversarialScenario
15
- from azure.identity import DefaultAzureCredential
15
+ from azure.ai.evaluation._model_configurations import AzureAIProject
16
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
17
+ from azure.core.credentials import TokenCredential
16
18
 
17
19
  from ._adversarial_simulator import AdversarialSimulator
18
- from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
20
+ from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
19
21
 
20
22
  logger = logging.getLogger(__name__)
21
23
 
22
24
 
23
- def monitor_adversarial_scenario(func) -> Callable:
24
- """Decorator to monitor adversarial scenario.
25
-
26
- :param func: The function to be decorated.
27
- :type func: Callable
28
- :return: The decorated function.
29
- :rtype: Callable
30
- """
31
-
32
- @functools.wraps(func)
33
- def wrapper(*args, **kwargs):
34
- scenario = str(kwargs.get("scenario", None))
35
- max_conversation_turns = kwargs.get("max_conversation_turns", None)
36
- max_simulation_results = kwargs.get("max_simulation_results", None)
37
- decorated_func = monitor_operation(
38
- activity_name="jailbreak.adversarial.simulator.call",
39
- activity_type=ActivityType.PUBLICAPI,
40
- custom_dimensions={
41
- "scenario": scenario,
42
- "max_conversation_turns": max_conversation_turns,
43
- "max_simulation_results": max_simulation_results,
44
- },
45
- )(func)
46
-
47
- return decorated_func(*args, **kwargs)
48
-
49
- return wrapper
50
-
51
-
25
+ @experimental
52
26
  class DirectAttackSimulator:
53
27
  """
54
28
  Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
55
29
  This simulator converses with your AI system using prompts designed to interrupt normal functionality.
56
30
 
57
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
58
- name.
59
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
31
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
32
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
33
+ :type azure_ai_project: Union[str, AzureAIProject]
60
34
  :param credential: The credential for connecting to Azure AI project.
61
35
  :type credential: ~azure.core.credentials.TokenCredential
36
+
37
+ .. admonition:: Example:
38
+
39
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
40
+ :start-after: [START direct_attack_simulator]
41
+ :end-before: [END direct_attack_simulator]
42
+ :language: python
43
+ :dedent: 8
44
+ :caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
62
45
  """
63
46
 
64
- def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
47
+ def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
65
48
  """Constructor."""
66
- # check if azure_ai_project has the keys: subscription_id, resource_group_name, project_name, credential
67
- if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
68
- msg = "azure_ai_project must contain keys: subscription_id, resource_group_name and project_name"
69
- raise EvaluationException(
70
- message=msg,
71
- internal_message=msg,
72
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
73
- category=ErrorCategory.MISSING_FIELD,
74
- blame=ErrorBlame.USER_ERROR,
49
+
50
+ if is_onedp_project(azure_ai_project):
51
+ self.azure_ai_project = azure_ai_project
52
+ self.credential = cast(TokenCredential, credential)
53
+ self.token_manager = ManagedIdentityAPITokenManager(
54
+ token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
55
+ logger=logging.getLogger("AdversarialSimulator"),
56
+ credential=self.credential,
75
57
  )
76
- # check the value of the keys in azure_ai_project is not none
77
- if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
78
- msg = "subscription_id, resource_group_name and project_name keys cannot be None"
79
- raise EvaluationException(
80
- message=msg,
81
- internal_message=msg,
82
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
83
- category=ErrorCategory.MISSING_FIELD,
84
- blame=ErrorBlame.USER_ERROR,
58
+ self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
59
+ else:
60
+ try:
61
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
62
+ except EvaluationException as e:
63
+ raise EvaluationException(
64
+ message=e.message,
65
+ internal_message=e.internal_message,
66
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
67
+ category=e.category,
68
+ blame=e.blame,
69
+ ) from e
70
+ self.credential = cast(TokenCredential, credential)
71
+ self.token_manager = ManagedIdentityAPITokenManager(
72
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
73
+ logger=logging.getLogger("AdversarialSimulator"),
74
+ credential=self.credential,
85
75
  )
86
- if "credential" not in azure_ai_project and not credential:
87
- credential = DefaultAzureCredential()
88
- elif "credential" in azure_ai_project:
89
- credential = azure_ai_project["credential"]
90
- self.credential = credential
91
- self.azure_ai_project = azure_ai_project
92
- self.token_manager = ManagedIdentityAPITokenManager(
93
- token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
94
- logger=logging.getLogger("AdversarialSimulator"),
95
- credential=credential,
96
- )
97
- self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
76
+ self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
77
+
98
78
  self.adversarial_template_handler = AdversarialTemplateHandler(
99
- azure_ai_project=azure_ai_project, rai_client=self.rai_client
79
+ azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
100
80
  )
101
81
 
102
82
  def _ensure_service_dependencies(self):
@@ -110,7 +90,6 @@ class DirectAttackSimulator:
110
90
  blame=ErrorBlame.USER_ERROR,
111
91
  )
112
92
 
113
- # @monitor_adversarial_scenario
114
93
  async def __call__(
115
94
  self,
116
95
  *,
@@ -169,7 +148,7 @@ class DirectAttackSimulator:
169
148
  - '**$schema**': A string indicating the schema URL for the conversation format.
170
149
 
171
150
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
172
- :rtype: Dict[str, [List[Dict[str, Any]]]] with two elements
151
+ :rtype: Dict[str, [List[Dict[str, Any]]]]
173
152
 
174
153
  **Output format**
175
154
 
@@ -232,7 +211,7 @@ class DirectAttackSimulator:
232
211
  api_call_retry_sleep_sec=api_call_retry_sleep_sec,
233
212
  api_call_delay_sec=api_call_delay_sec,
234
213
  concurrent_async_task=concurrent_async_task,
235
- randomize_order=True,
214
+ randomize_order=False,
236
215
  randomization_seed=randomization_seed,
237
216
  )
238
217
  jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
@@ -246,7 +225,7 @@ class DirectAttackSimulator:
246
225
  api_call_delay_sec=api_call_delay_sec,
247
226
  concurrent_async_task=concurrent_async_task,
248
227
  _jailbreak_type="upia",
249
- randomize_order=True,
228
+ randomize_order=False,
250
229
  randomization_seed=randomization_seed,
251
230
  )
252
231
  return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
@@ -1,5 +1,4 @@
1
- from ._experimental import experimental
2
1
  from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
3
2
  from ._simulator_data_classes import ConversationHistory, Turn
4
3
 
5
- __all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING", "experimental"]
4
+ __all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
@@ -14,4 +14,5 @@ SUPPORTED_LANGUAGES_MAPPING = {
14
14
  SupportedLanguages.SimplifiedChinese: BASE_SUFFIX.replace("__language__", "simplified chinese"),
15
15
  SupportedLanguages.Portuguese: BASE_SUFFIX.replace("__language__", "portuguese"),
16
16
  SupportedLanguages.Japanese: BASE_SUFFIX.replace("__language__", "japanese"),
17
+ SupportedLanguages.Korean: BASE_SUFFIX.replace("__language__", "korean"),
17
18
  }
@@ -18,7 +18,7 @@ class Turn:
18
18
 
19
19
  role: Union[str, ConversationRole]
20
20
  content: str
21
- context: str = None
21
+ context: Optional[str] = None
22
22
 
23
23
  def to_dict(self) -> Dict[str, Optional[str]]:
24
24
  """
@@ -30,7 +30,19 @@ class Turn:
30
30
  return {
31
31
  "role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
32
32
  "content": self.content,
33
- "context": self.context,
33
+ "context": str(self.context),
34
+ }
35
+
36
+ def to_context_free_dict(self) -> Dict[str, Optional[str]]:
37
+ """
38
+ Convert the conversation turn to a dictionary without context.
39
+
40
+ :returns: A dictionary representation of the conversation turn without context.
41
+ :rtype: Dict[str, Optional[str]]
42
+ """
43
+ return {
44
+ "role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
45
+ "content": self.content,
34
46
  }
35
47
 
36
48
  def __repr__(self):
@@ -42,13 +54,13 @@ class ConversationHistory:
42
54
  Conversation history class to keep track of the conversation turns in a conversation.
43
55
  """
44
56
 
45
- def __init__(self):
57
+ def __init__(self) -> None:
46
58
  """
47
59
  Initializes the conversation history with an empty list of turns.
48
60
  """
49
61
  self.history: List[Turn] = []
50
62
 
51
- def add_to_history(self, turn: Turn):
63
+ def add_to_history(self, turn: Turn) -> None:
52
64
  """
53
65
  Adds a turn to the conversation history.
54
66
 
@@ -57,7 +69,7 @@ class ConversationHistory:
57
69
  """
58
70
  self.history.append(turn)
59
71
 
60
- def to_list(self) -> List[Dict[str, str]]:
72
+ def to_list(self) -> List[Dict[str, Optional[str]]]:
61
73
  """
62
74
  Converts the conversation history to a list of dictionaries.
63
75
 
@@ -66,6 +78,15 @@ class ConversationHistory:
66
78
  """
67
79
  return [turn.to_dict() for turn in self.history]
68
80
 
81
+ def to_context_free_list(self) -> List[Dict[str, Optional[str]]]:
82
+ """
83
+ Converts the conversation history to a list of dictionaries without context.
84
+
85
+ :returns: A list of dictionaries representing the conversation turns without context.
86
+ :rtype: List[Dict[str, str]]
87
+ """
88
+ return [turn.to_context_free_dict() for turn in self.history]
89
+
69
90
  def __len__(self) -> int:
70
91
  return len(self.history)
71
92
 
@@ -1,100 +1,90 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
+ # pylint: disable=C0301,C0114,R0913,R0903
4
5
  # noqa: E501
5
- import functools
6
+ import asyncio
6
7
  import logging
7
- from typing import Callable
8
+ import random
9
+ from typing import Callable, cast, Union, Optional
8
10
 
9
- from promptflow._sdk._telemetry import ActivityType, monitor_operation
11
+ from tqdm import tqdm
10
12
 
13
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
14
+ from azure.ai.evaluation._common._experimental import experimental
11
15
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
16
+ from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
12
17
  from azure.ai.evaluation._model_configurations import AzureAIProject
13
- from azure.ai.evaluation.simulator import AdversarialScenario
14
- from azure.identity import DefaultAzureCredential
18
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
19
+ from azure.core.credentials import TokenCredential
20
+ from azure.ai.evaluation._constants import TokenScope
15
21
 
16
- from ._adversarial_simulator import AdversarialSimulator
17
- from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
22
+ from ._adversarial_simulator import AdversarialSimulator, JsonLineList
18
23
 
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- def monitor_adversarial_scenario(func) -> Callable:
23
- """Decorator to monitor adversarial scenario.
24
-
25
- :param func: The function to be decorated.
26
- :type func: Callable
27
- :return: The decorated function.
28
- :rtype: Callable
29
- """
24
+ from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
30
25
 
31
- @functools.wraps(func)
32
- def wrapper(*args, **kwargs):
33
- scenario = str(kwargs.get("scenario", None))
34
- max_conversation_turns = kwargs.get("max_conversation_turns", None)
35
- max_simulation_results = kwargs.get("max_simulation_results", None)
36
- decorated_func = monitor_operation(
37
- activity_name="xpia.adversarial.simulator.call",
38
- activity_type=ActivityType.PUBLICAPI,
39
- custom_dimensions={
40
- "scenario": scenario,
41
- "max_conversation_turns": max_conversation_turns,
42
- "max_simulation_results": max_simulation_results,
43
- },
44
- )(func)
45
-
46
- return decorated_func(*args, **kwargs)
47
-
48
- return wrapper
26
+ logger = logging.getLogger(__name__)
49
27
 
50
28
 
51
- class IndirectAttackSimulator:
29
+ @experimental
30
+ class IndirectAttackSimulator(AdversarialSimulator):
52
31
  """
53
32
  Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
54
33
 
55
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
56
- name.
57
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
34
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
35
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
36
+ :type azure_ai_project: Union[str, AzureAIProject]
58
37
  :param credential: The credential for connecting to Azure AI project.
59
38
  :type credential: ~azure.core.credentials.TokenCredential
39
+
40
+ .. admonition:: Example:
41
+
42
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
43
+ :start-after: [START indirect_attack_simulator]
44
+ :end-before: [END indirect_attack_simulator]
45
+ :language: python
46
+ :dedent: 8
47
+ :caption: Run the IndirectAttackSimulator to produce 1 result with 1 conversation turn (2 messages in the result).
60
48
  """
61
49
 
62
- def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
50
+ def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
63
51
  """Constructor."""
64
- # check if azure_ai_project has the keys: subscription_id, resource_group_name, project_name, credential
65
- if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
66
- msg = "azure_ai_project must contain keys: subscription_id, resource_group_name and project_name"
67
- raise EvaluationException(
68
- message=msg,
69
- internal_message=msg,
70
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
71
- category=ErrorCategory.MISSING_FIELD,
72
- blame=ErrorBlame.USER_ERROR,
52
+
53
+ if is_onedp_project(azure_ai_project):
54
+ self.azure_ai_project = azure_ai_project
55
+ self.credential = cast(TokenCredential, credential)
56
+ self.token_manager = ManagedIdentityAPITokenManager(
57
+ token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
58
+ logger=logging.getLogger("AdversarialSimulator"),
59
+ credential=self.credential,
73
60
  )
74
- if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
75
- msg = "subscription_id, resource_group_name and project_name keys cannot be None"
76
- raise EvaluationException(
77
- message=msg,
78
- internal_message=msg,
79
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
80
- category=ErrorCategory.MISSING_FIELD,
81
- blame=ErrorBlame.USER_ERROR,
61
+ self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
62
+ self.adversarial_template_handler = AdversarialTemplateHandler(
63
+ azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
82
64
  )
83
- if "credential" not in azure_ai_project and not credential:
84
- credential = DefaultAzureCredential()
85
- elif "credential" in azure_ai_project:
86
- credential = azure_ai_project["credential"]
87
- self.credential = credential
88
- self.azure_ai_project = azure_ai_project
89
- self.token_manager = ManagedIdentityAPITokenManager(
90
- token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
91
- logger=logging.getLogger("AdversarialSimulator"),
92
- credential=credential,
93
- )
94
- self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
95
- self.adversarial_template_handler = AdversarialTemplateHandler(
96
- azure_ai_project=azure_ai_project, rai_client=self.rai_client
97
- )
65
+ else:
66
+ try:
67
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
68
+ except EvaluationException as e:
69
+ raise EvaluationException(
70
+ message=e.message,
71
+ internal_message=e.internal_message,
72
+ target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
73
+ category=e.category,
74
+ blame=e.blame,
75
+ ) from e
76
+
77
+ self.credential = cast(TokenCredential, credential)
78
+ self.token_manager = ManagedIdentityAPITokenManager(
79
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
80
+ logger=logging.getLogger("AdversarialSimulator"),
81
+ credential=self.credential,
82
+ )
83
+ self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
84
+ self.adversarial_template_handler = AdversarialTemplateHandler(
85
+ azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
86
+ )
87
+ super().__init__(azure_ai_project=azure_ai_project, credential=credential)
98
88
 
99
89
  def _ensure_service_dependencies(self):
100
90
  if self.rai_client is None:
@@ -107,33 +97,26 @@ class IndirectAttackSimulator:
107
97
  blame=ErrorBlame.USER_ERROR,
108
98
  )
109
99
 
110
- # @monitor_adversarial_scenario
111
100
  async def __call__(
112
101
  self,
113
102
  *,
114
- scenario: AdversarialScenario,
115
103
  target: Callable,
116
- max_conversation_turns: int = 1,
117
104
  max_simulation_results: int = 3,
118
105
  api_call_retry_limit: int = 3,
119
106
  api_call_retry_sleep_sec: int = 1,
120
107
  api_call_delay_sec: int = 0,
121
108
  concurrent_async_task: int = 3,
109
+ randomization_seed: Optional[int] = None,
110
+ **kwargs,
122
111
  ):
123
112
  """
124
113
  Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
125
114
  This simulator converses with your AI system using prompts injected into the context to interrupt normal
126
115
  expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside
127
116
  the scope of your AI system.
128
-
129
- :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
130
- :paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario
131
117
  :keyword target: The target function to simulate adversarial inputs against.
132
118
  This function should be asynchronous and accept a dictionary representing the adversarial input.
133
119
  :paramtype target: Callable
134
- :keyword max_conversation_turns: The maximum number of conversation turns to simulate.
135
- Defaults to 1.
136
- :paramtype max_conversation_turns: int
137
120
  :keyword max_simulation_results: The maximum number of simulation results to return.
138
121
  Defaults to 3.
139
122
  :paramtype max_simulation_results: int
@@ -149,6 +132,9 @@ class IndirectAttackSimulator:
149
132
  :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
150
133
  Defaults to 3.
151
134
  :paramtype concurrent_async_task: int
135
+ :keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
136
+ default seed is used. Defaults to None.
137
+ :paramtype randomization_seed: Optional[int]
152
138
  :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
153
139
 
154
140
  - 'template_parameters': A dictionary with parameters used in the conversation template,
@@ -170,11 +156,11 @@ class IndirectAttackSimulator:
170
156
  'template_parameters': {},
171
157
  'messages': [
172
158
  {
173
- 'content': '<jailbreak prompt> <adversarial query>',
159
+ 'content': '<adversarial query>',
174
160
  'role': 'user'
175
161
  },
176
162
  {
177
- 'content': "<response from endpoint>",
163
+ 'content': "<response from your callback>",
178
164
  'role': 'assistant',
179
165
  'context': None
180
166
  }
@@ -183,25 +169,80 @@ class IndirectAttackSimulator:
183
169
  }]
184
170
  }
185
171
  """
186
- if scenario not in AdversarialScenario.__members__.values():
187
- msg = f"Invalid scenario: {scenario}. Supported scenarios: {AdversarialScenario.__members__.values()}"
188
- raise EvaluationException(
189
- message=msg,
190
- internal_message=msg,
191
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
192
- category=ErrorCategory.INVALID_VALUE,
193
- blame=ErrorBlame.USER_ERROR,
172
+ # values that cannot be changed:
173
+ scenario = AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK
174
+ max_conversation_turns = 2
175
+ language = SupportedLanguages.English
176
+ self._ensure_service_dependencies()
177
+ templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
178
+ concurrent_async_task = min(concurrent_async_task, 1000)
179
+ semaphore = asyncio.Semaphore(concurrent_async_task)
180
+ sim_results = []
181
+ tasks = []
182
+ total_tasks = sum(len(t.template_parameters) for t in templates)
183
+ if max_simulation_results > total_tasks:
184
+ logger.warning(
185
+ "Cannot provide %s results due to maximum number of adversarial simulations that can be generated: %s."
186
+ "\n %s simulations will be generated.",
187
+ max_simulation_results,
188
+ total_tasks,
189
+ total_tasks,
194
190
  )
195
- jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
196
- jb_sim_results = await jb_sim(
197
- scenario=scenario,
198
- target=target,
199
- max_conversation_turns=max_conversation_turns,
200
- max_simulation_results=max_simulation_results,
201
- api_call_retry_limit=api_call_retry_limit,
202
- api_call_retry_sleep_sec=api_call_retry_sleep_sec,
203
- api_call_delay_sec=api_call_delay_sec,
204
- concurrent_async_task=concurrent_async_task,
205
- _jailbreak_type="xpia",
191
+ total_tasks = min(total_tasks, max_simulation_results)
192
+ progress_bar = tqdm(
193
+ total=total_tasks,
194
+ desc="generating jailbreak simulations",
195
+ ncols=100,
196
+ unit="simulations",
206
197
  )
207
- return jb_sim_results
198
+
199
+ # Apply randomization to templates if seed is provided
200
+ if randomization_seed is not None:
201
+ # Create a local random instance to avoid polluting global state
202
+ local_random = random.Random(randomization_seed)
203
+ local_random.shuffle(templates)
204
+
205
+ for template in templates:
206
+ for parameter in template.template_parameters:
207
+ tasks.append(
208
+ asyncio.create_task(
209
+ self._simulate_async(
210
+ target=target,
211
+ template=template,
212
+ parameters=parameter,
213
+ max_conversation_turns=max_conversation_turns,
214
+ api_call_retry_limit=api_call_retry_limit,
215
+ api_call_retry_sleep_sec=api_call_retry_sleep_sec,
216
+ api_call_delay_sec=api_call_delay_sec,
217
+ language=language,
218
+ semaphore=semaphore,
219
+ scenario=scenario,
220
+ )
221
+ )
222
+ )
223
+ if len(tasks) >= max_simulation_results:
224
+ break
225
+ if len(tasks) >= max_simulation_results:
226
+ break
227
+ for task in asyncio.as_completed(tasks):
228
+ completed_task = await task # type: ignore
229
+ template_parameters = completed_task.get("template_parameters", {}) # type: ignore
230
+ xpia_attack_type = template_parameters.get("xpia_attack_type", "") # type: ignore
231
+ action = template_parameters.get("action", "") # type: ignore
232
+ document_type = template_parameters.get("document_type", "") # type: ignore
233
+ sim_results.append(
234
+ {
235
+ "messages": completed_task["messages"], # type: ignore
236
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
237
+ "template_parameters": {
238
+ "metadata": {
239
+ "xpia_attack_type": xpia_attack_type,
240
+ "action": action,
241
+ "document_type": document_type,
242
+ },
243
+ },
244
+ }
245
+ )
246
+ progress_bar.update(1)
247
+ progress_bar.close()
248
+ return JsonLineList(sim_results)
@@ -4,11 +4,12 @@
4
4
 
5
5
  """Tooling for model evaluation"""
6
6
 
7
- from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager, TokenScope
7
+ from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager
8
8
  from ._proxy_completion_model import ProxyChatCompletionsModel
9
9
  from ._rai_client import RAIClient
10
10
  from ._template_handler import CONTENT_HARM_TEMPLATES_COLLECTION_KEY, AdversarialTemplateHandler
11
11
  from .models import LLMBase, OpenAIChatCompletionsModel
12
+ from ..._constants import TokenScope
12
13
 
13
14
  __all__ = [
14
15
  "ManagedIdentityAPITokenManager",