azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -2,13 +2,15 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  import os
5
- from typing import Any
5
+ from typing import Any, Dict, List
6
6
  from urllib.parse import urljoin, urlparse
7
+ import base64
8
+ import json
7
9
 
8
10
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
9
11
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
10
12
  from azure.ai.evaluation._model_configurations import AzureAIProject
11
- from azure.ai.evaluation._user_agent import USER_AGENT
13
+ from azure.ai.evaluation._user_agent import UserAgentSingleton
12
14
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
13
15
 
14
16
  from ._identity_manager import APITokenManager
@@ -57,9 +59,11 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
57
59
  # add a "/" at the end of the url
58
60
  self.api_url = self.api_url.rstrip("/") + "/"
59
61
  self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
62
+ self.parameter_image_endpoint = urljoin(self.api_url, "simulation/template/parameters/image")
60
63
  self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
61
64
  self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
62
65
  self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
66
+ self.attack_objectives_endpoint = urljoin(self.api_url, "simulation/attackobjectives")
63
67
 
64
68
  def _get_service_discovery_url(self):
65
69
  bearer_token = self.token_manager.get_token()
@@ -74,14 +78,18 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
74
78
  timeout=5,
75
79
  )
76
80
  if response.status_code != 200:
77
- msg = "Failed to retrieve the discovery service URL."
81
+ msg = (
82
+ f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
83
+ f"correctly, and make sure you have the necessary access permissions. "
84
+ f"Status code: {response.status_code}."
85
+ )
78
86
  raise EvaluationException(
79
87
  message=msg,
80
- internal_message=msg,
81
88
  target=ErrorTarget.RAI_CLIENT,
82
- category=ErrorCategory.SERVICE_UNAVAILABLE,
83
- blame=ErrorBlame.UNKNOWN,
89
+ category=ErrorCategory.PROJECT_ACCESS_ERROR,
90
+ blame=ErrorBlame.USER_ERROR,
84
91
  )
92
+
85
93
  base_url = urlparse(response.json()["properties"]["discoveryUrl"])
86
94
  return f"{base_url.scheme}://{base_url.netloc}"
87
95
 
@@ -140,7 +148,7 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
140
148
  headers = {
141
149
  "Authorization": f"Bearer {token}",
142
150
  "Content-Type": "application/json",
143
- "User-Agent": USER_AGENT,
151
+ "User-Agent": UserAgentSingleton().value,
144
152
  }
145
153
 
146
154
  session = self._create_async_client()
@@ -162,3 +170,97 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
162
170
  category=ErrorCategory.UNKNOWN,
163
171
  blame=ErrorBlame.USER_ERROR,
164
172
  )
173
+
174
+ async def get_image_data(self, path: str) -> Any:
175
+ """Make a GET Image request to the given url
176
+
177
+ :param path: The url of the image
178
+ :type path: str
179
+ :raises EvaluationException: If the Azure safety evaluation service is not available in the current region
180
+ :return: The response
181
+ :rtype: Any
182
+ """
183
+ token = self.token_manager.get_token()
184
+ headers = {
185
+ "Authorization": f"Bearer {token}",
186
+ "Content-Type": "application/json",
187
+ "User-Agent": UserAgentSingleton().value,
188
+ }
189
+
190
+ session = self._create_async_client()
191
+ params = {"path": path}
192
+ async with session:
193
+ response = await session.get(
194
+ url=self.parameter_image_endpoint, params=params, headers=headers
195
+ ) # pylint: disable=unexpected-keyword-arg
196
+
197
+ if response.status_code == 200:
198
+ return base64.b64encode(response.content).decode("utf-8")
199
+
200
+ msg = (
201
+ "Azure safety evaluation service is not available in your current region, "
202
+ + "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
203
+ )
204
+ raise EvaluationException(
205
+ message=msg,
206
+ internal_message=msg,
207
+ target=ErrorTarget.RAI_CLIENT,
208
+ category=ErrorCategory.UNKNOWN,
209
+ blame=ErrorBlame.USER_ERROR,
210
+ )
211
+
212
+ async def get_attack_objectives(
213
+ self, risk_categories: List[str], application_scenario: str = None, strategy: str = None
214
+ ) -> Any:
215
+ """Get the attack objectives based on risk categories and application scenario
216
+
217
+ :param risk_categories: List of risk categories to generate attack objectives for
218
+ :type risk_categories: List[str]
219
+ :param application_scenario: Optional description of the application scenario for context
220
+ :type application_scenario: str
221
+ :param strategy: Optional attack strategy to get specific objectives for
222
+ :type strategy: str
223
+ :return: The attack objectives
224
+ :rtype: Any
225
+ """
226
+ # Create query parameters for the request
227
+ if application_scenario:
228
+ raise NotImplementedError("Application scenario is not supported yet")
229
+
230
+ params = {
231
+ "api-version": "2022-11-01-preview",
232
+ "riskTypes": ",".join(risk_categories),
233
+ "lang": "en", # Default to English
234
+ }
235
+
236
+ # Add strategy parameter if provided
237
+ if strategy:
238
+ params["strategy"] = strategy
239
+
240
+ try:
241
+ # Make the request using the existing get method
242
+ result = await self.get(self.attack_objectives_endpoint)
243
+ # from collections import defaultdict
244
+ # counts_by_risk = defaultdict(int)
245
+ # for item in result:
246
+ # target_harms = item.get("Metadata", {}).get("TargetHarms", [])
247
+ # if not target_harms:
248
+ # # No risk type specified
249
+ # counts_by_risk["empty"] += 1
250
+ # else:
251
+ # for harm in target_harms:
252
+ # # Use "empty" if the risk type field is missing
253
+ # risk_type = harm.get("RiskType", "") or "empty"
254
+ # counts_by_risk[risk_type] += 1
255
+ return result
256
+ except Exception:
257
+ # If the API fails or isn't implemented yet, return a mock response
258
+ # This is temporary until the API endpoint is fully implemented
259
+ return [
260
+ {
261
+ "metadata": {"lang": "en", "target_harms": [{"risk-type": "violence", "risk-subtype": ""}]},
262
+ "messages": [{"role": "user", "content": "Risky content"}],
263
+ "modality": "text",
264
+ "source": ["source"],
265
+ }
266
+ ]
@@ -2,25 +2,69 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import Optional
5
+ from typing import Dict, List, Optional, TypedDict, cast, Union
6
+ from ast import literal_eval
7
+ from typing_extensions import NotRequired
6
8
 
7
9
  from azure.ai.evaluation._model_configurations import AzureAIProject
10
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
11
+ from azure.ai.evaluation.simulator._adversarial_scenario import AdversarialScenario
8
12
 
9
13
  from ._rai_client import RAIClient
10
14
 
11
- CONTENT_HARM_TEMPLATES_COLLECTION_KEY = set(
12
- [
13
- "adv_qa",
14
- "adv_conversation",
15
- "adv_summarization",
16
- "adv_search",
17
- "adv_rewrite",
18
- "adv_content_gen_ungrounded",
19
- "adv_content_gen_grounded",
20
- "adv_content_protected_material",
21
- "adv_politics",
22
- ]
23
- )
15
+ CONTENT_HARM_TEMPLATES_COLLECTION_KEY = {
16
+ "adv_qa",
17
+ "adv_conversation",
18
+ "adv_summarization",
19
+ "adv_search",
20
+ "adv_rewrite",
21
+ "adv_content_gen_ungrounded",
22
+ "adv_content_gen_grounded",
23
+ "adv_content_protected_material",
24
+ "adv_politics",
25
+ }
26
+
27
+
28
+ class TemplateParameters(TypedDict):
29
+ """Parameters used in Templates
30
+
31
+ .. note::
32
+
33
+ This type is good enough to type check, but is incorrect. It's meant to represent a dictionary with a known
34
+ `metadata` key (Dict[str, str]), a known `ch_template_placeholder` key (str), and an unknown number of keys
35
+ that map to `str` values.
36
+
37
+ In typescript, this type would be spelled:
38
+
39
+ .. code-block:: typescript
40
+
41
+ type AdversarialTemplateParameters = {
42
+ [key: string]: string
43
+ ch_template_placeholder: string
44
+ metadata: {[index: string]: string} # Doesn't typecheck but gets the point across
45
+ }
46
+
47
+ At time of writing, this isn't possible to express with a TypedDict. TypedDicts must be "closed" in that
48
+ they fully specify all the keys they can contain.
49
+
50
+ `PEP 728 – TypedDict with Typed Extra Items <https://peps.python.org/pep-0728/>` is a proposal to support
51
+ this, but would only be available in Python 3.13 at the earliest.
52
+ """
53
+
54
+ metadata: Dict[str, str]
55
+ conversation_starter: str
56
+ ch_template_placeholder: str
57
+ group_of_people: NotRequired[str]
58
+ category: NotRequired[str]
59
+ target_population: NotRequired[str]
60
+ topic: NotRequired[str]
61
+ jailbreak_string: NotRequired[str]
62
+
63
+
64
+ class _CategorizedParameter(TypedDict):
65
+ parameters: List[TemplateParameters]
66
+ category: str
67
+ parameters_key: str
24
68
 
25
69
 
26
70
  class ContentHarmTemplatesUtils:
@@ -85,45 +129,53 @@ class AdversarialTemplate:
85
129
  :param template_parameters: The template parameters.
86
130
  """
87
131
 
88
- def __init__(self, template_name, text, context_key, template_parameters=None) -> None:
132
+ def __init__(
133
+ self,
134
+ template_name: str,
135
+ text: Optional[str],
136
+ context_key: List,
137
+ template_parameters: Optional[List[TemplateParameters]] = None,
138
+ ) -> None:
89
139
  self.text = text
90
140
  self.context_key = context_key
91
141
  self.template_name = template_name
92
- self.template_parameters = template_parameters
142
+ self.template_parameters = template_parameters or []
93
143
 
94
- def __str__(self):
144
+ def __str__(self) -> str:
95
145
  return "{{ch_template_placeholder}}"
96
146
 
97
147
 
98
148
  class AdversarialTemplateHandler:
99
149
  """
100
- Adversarial template handler constructor.
150
+ Initialize the AdversarialTemplateHandler.
101
151
 
102
- :param azure_ai_project: The Azure AI project.
103
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
104
- :param rai_client: The RAI client.
105
- :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
152
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
153
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
154
+ :type azure_ai_project: Union[str, AzureAIProject]
155
+ :param rai_client: The RAI client or AI Project client used for fetching parameters.
156
+ :type rai_client: Union[~azure.ai.evaluation.simulator._model_tools.RAIClient, ~azure.ai.evaluation._common.onedp._client.AIProjectClient]
106
157
  """
107
158
 
108
- def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
109
- self.cached_templates_source = {}
110
- # self.template_env = JinjaEnvironment(loader=JinjaFileSystemLoader(searchpath=template_dir))
159
+ def __init__(
160
+ self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]
161
+ ) -> None:
111
162
  self.azure_ai_project = azure_ai_project
112
- self.categorized_ch_parameters = None
163
+ self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
113
164
  self.rai_client = rai_client
114
165
 
115
- async def _get_content_harm_template_collections(self, collection_key):
116
-
166
+ async def _get_content_harm_template_collections(self, collection_key: str) -> List[AdversarialTemplate]:
117
167
  if self.categorized_ch_parameters is None:
118
- categorized_parameters = {}
168
+ categorized_parameters: Dict[str, _CategorizedParameter] = {}
119
169
  util = ContentHarmTemplatesUtils
120
-
121
- parameters = await self.rai_client.get_contentharm_parameters()
170
+ if isinstance(self.rai_client, RAIClient):
171
+ parameters = await self.rai_client.get_contentharm_parameters()
172
+ elif isinstance(self.rai_client, AIProjectClient):
173
+ parameters = literal_eval(self.rai_client.red_teams.get_template_parameters())
122
174
 
123
175
  for k in parameters.keys():
124
176
  template_key = util.get_template_key(k)
125
177
  categorized_parameters[template_key] = {
126
- "parameters": parameters[k],
178
+ "parameters": cast(List[TemplateParameters], parameters[k]),
127
179
  "category": util.get_template_category(k),
128
180
  "parameters_key": k,
129
181
  }
@@ -131,17 +183,29 @@ class AdversarialTemplateHandler:
131
183
 
132
184
  template_category = collection_key.split("adv_")[-1]
133
185
 
186
+ # Handle both qa_enterprise and qa_documents mapping to qa
187
+ if template_category in ["qa_enterprise", "qa_documents"]:
188
+ template_category = "qa"
189
+
134
190
  plist = self.categorized_ch_parameters
135
191
  ch_templates = []
192
+
136
193
  for key, value in plist.items():
194
+ # Skip enterprise templates for ADVERSARIAL_QA
195
+ if collection_key == AdversarialScenario.ADVERSARIAL_QA.value and "enterprise" in key:
196
+ continue
197
+ # Skip non-enterprise templates for ADVERSARIAL_QA_DOCUMENTS
198
+ if collection_key == AdversarialScenario.ADVERSARIAL_QA_DOCUMENTS.value and "enterprise" not in key:
199
+ continue
200
+
137
201
  if value["category"] == template_category:
138
202
  params = value["parameters"]
139
203
  for p in params:
140
204
  p.update({"ch_template_placeholder": "{{ch_template_placeholder}}"})
141
205
 
142
206
  template = AdversarialTemplate(template_name=key, text=None, context_key=[], template_parameters=params)
143
-
144
207
  ch_templates.append(template)
208
+
145
209
  return ch_templates
146
210
 
147
211
  def get_template(self, template_name: str) -> Optional[AdversarialTemplate]:
@@ -12,6 +12,8 @@ from abc import ABC, abstractmethod
12
12
  from collections import deque
13
13
  from typing import Deque, Dict, List, Optional, Union
14
14
  from urllib.parse import urlparse
15
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
16
+ from ._rai_client import RAIClient
15
17
 
16
18
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
19
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
@@ -49,10 +51,10 @@ class LLMBase(ABC):
49
51
  Base class for all LLM models.
50
52
  """
51
53
 
52
- def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = {}):
54
+ def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[Dict[str, str]] = None):
53
55
  self.endpoint_url = endpoint_url
54
56
  self.name = name
55
- self.additional_headers = additional_headers
57
+ self.additional_headers = additional_headers or {}
56
58
  self.logger = logging.getLogger(repr(self))
57
59
 
58
60
  # Metric tracking
@@ -78,7 +80,7 @@ class LLMBase(ABC):
78
80
  async def get_completion(
79
81
  self,
80
82
  prompt: str,
81
- session: AsyncHttpPipeline,
83
+ session: Union[AsyncHttpPipeline, AIProjectClient],
82
84
  **request_params,
83
85
  ) -> dict:
84
86
  """
@@ -100,7 +102,7 @@ class LLMBase(ABC):
100
102
  async def get_all_completions(
101
103
  self,
102
104
  prompts: List[str],
103
- session: AsyncHttpPipeline,
105
+ session: Union[AsyncHttpPipeline, AIProjectClient],
104
106
  api_call_max_parallel_count: int,
105
107
  api_call_delay_seconds: float,
106
108
  request_error_rate_threshold: float,
@@ -120,7 +122,7 @@ class LLMBase(ABC):
120
122
  async def get_conversation_completion(
121
123
  self,
122
124
  messages: List[dict],
123
- session: AsyncHttpPipeline,
125
+ session: Union[AsyncHttpPipeline, AIProjectClient],
124
126
  role: str,
125
127
  **request_params,
126
128
  ) -> dict:
@@ -208,7 +210,7 @@ class OpenAICompletionsModel(LLMBase):
208
210
  *,
209
211
  endpoint_url: str,
210
212
  name: str = "OpenAICompletionsModel",
211
- additional_headers: Optional[dict] = {},
213
+ additional_headers: Optional[Dict[str, str]] = None,
212
214
  api_version: Optional[str] = "2023-03-15-preview",
213
215
  token_manager: APITokenManager,
214
216
  azureml_model_deployment: Optional[str] = None,
@@ -220,7 +222,7 @@ class OpenAICompletionsModel(LLMBase):
220
222
  frequency_penalty: Optional[float] = 0,
221
223
  presence_penalty: Optional[float] = 0,
222
224
  stop: Optional[Union[List[str], str]] = None,
223
- image_captions: Dict[str, str] = {},
225
+ image_captions: Optional[Dict[str, str]] = None,
224
226
  images_dir: Optional[str] = None, # Note: unused, kept for class compatibility
225
227
  ):
226
228
  super().__init__(endpoint_url=endpoint_url, name=name, additional_headers=additional_headers)
@@ -234,7 +236,7 @@ class OpenAICompletionsModel(LLMBase):
234
236
  self.n = n
235
237
  self.frequency_penalty = frequency_penalty
236
238
  self.presence_penalty = presence_penalty
237
- self.image_captions = image_captions
239
+ self.image_captions = image_captions or {}
238
240
 
239
241
  # Default stop to end token if not provided
240
242
  if not stop:
@@ -263,7 +265,7 @@ class OpenAICompletionsModel(LLMBase):
263
265
  def get_model_params(self):
264
266
  return {param: getattr(self, param) for param in self.model_param_names if getattr(self, param) is not None}
265
267
 
266
- def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]:
268
+ def format_request_data(self, prompt: Dict[str, str], **request_params) -> Dict[str, str]: # type: ignore[override]
267
269
  """
268
270
  Format the request data for the OpenAI API.
269
271
  """
@@ -274,7 +276,7 @@ class OpenAICompletionsModel(LLMBase):
274
276
  async def get_conversation_completion(
275
277
  self,
276
278
  messages: List[dict],
277
- session: AsyncHttpPipeline,
279
+ session: Union[AsyncHttpPipeline, AIProjectClient],
278
280
  role: str = "assistant",
279
281
  **request_params,
280
282
  ) -> dict:
@@ -304,7 +306,7 @@ class OpenAICompletionsModel(LLMBase):
304
306
  async def get_all_completions( # type: ignore[override]
305
307
  self,
306
308
  prompts: List[Dict[str, str]],
307
- session: AsyncHttpPipeline,
309
+ session: Union[AsyncHttpPipeline, AIProjectClient],
308
310
  api_call_max_parallel_count: int = 1,
309
311
  api_call_delay_seconds: float = 0.1,
310
312
  request_error_rate_threshold: float = 0.5,
@@ -328,7 +330,7 @@ class OpenAICompletionsModel(LLMBase):
328
330
  # Format prompts and tag with index
329
331
  request_datas: List[Dict] = []
330
332
  for idx, prompt in enumerate(prompts):
331
- prompt: Dict[str, str] = self.format_request_data(prompt, **request_params)
333
+ prompt = self.format_request_data(prompt, **request_params)
332
334
  prompt[self.prompt_idx_key] = idx # type: ignore[assignment]
333
335
  request_datas.append(prompt)
334
336
 
@@ -372,7 +374,7 @@ class OpenAICompletionsModel(LLMBase):
372
374
  self,
373
375
  request_datas: List[dict],
374
376
  output_collector: List,
375
- session: AsyncHttpPipeline,
377
+ session: Union[AsyncHttpPipeline, AIProjectClient],
376
378
  api_call_delay_seconds: float = 0.1,
377
379
  request_error_rate_threshold: float = 0.5,
378
380
  ) -> None:
@@ -433,7 +435,7 @@ class OpenAICompletionsModel(LLMBase):
433
435
 
434
436
  async def request_api(
435
437
  self,
436
- session: AsyncHttpPipeline,
438
+ session: Union[AsyncHttpPipeline, AIProjectClient],
437
439
  request_data: dict,
438
440
  ) -> dict:
439
441
  """
@@ -447,7 +449,7 @@ class OpenAICompletionsModel(LLMBase):
447
449
 
448
450
  self._log_request(request_data)
449
451
 
450
- token = await self.token_manager.get_token()
452
+ token = self.token_manager.get_token()
451
453
 
452
454
  headers = {
453
455
  "Content-Type": "application/json",
@@ -476,11 +478,12 @@ class OpenAICompletionsModel(LLMBase):
476
478
  time_start = time.time()
477
479
  full_response = None
478
480
 
479
- response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
480
-
481
- response.raise_for_status()
482
-
483
- response_data = response.json()
481
+ if isinstance(session, AIProjectClient):
482
+ response_data = session.red_teams.submit_simulation(request_data, headers, params)
483
+ else:
484
+ response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
485
+ response.raise_for_status()
486
+ response_data = response.json()
484
487
 
485
488
  self.logger.info(f"Response: {response_data}")
486
489
 
@@ -522,8 +525,8 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
522
525
  formats the prompt for chat completion.
523
526
  """
524
527
 
525
- def __init__(self, name="OpenAIChatCompletionsModel", *args, **kwargs):
526
- super().__init__(name=name, *args, **kwargs)
528
+ def __init__(self, name="OpenAIChatCompletionsModel", **kwargs):
529
+ super().__init__(name=name, **kwargs)
527
530
 
528
531
  def format_request_data(self, messages: List[dict], **request_params): # type: ignore[override]
529
532
  request_data = {"messages": messages, **self.get_model_params()}
@@ -533,7 +536,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
533
536
  async def get_conversation_completion(
534
537
  self,
535
538
  messages: List[dict],
536
- session: AsyncHttpPipeline,
539
+ session: Union[AsyncHttpPipeline, AIProjectClient],
537
540
  role: str = "assistant",
538
541
  **request_params,
539
542
  ) -> dict:
@@ -544,7 +547,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
544
547
  ----------
545
548
  messages: List of messages to query the model with.
546
549
  Expected format: [{"role": "user", "content": "Hello!"}, ...]
547
- session: AsyncHttpPipeline object to query the model with.
550
+ session: Union[AsyncHttpPipeline, AIProjectClient] object to query the model with.
548
551
  role: Not used for this model, since it is a chat model.
549
552
  request_params: Additional parameters to pass to the model.
550
553
  """
@@ -560,7 +563,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
560
563
  async def get_completion(
561
564
  self,
562
565
  prompt: str,
563
- session: AsyncHttpPipeline,
566
+ session: Union[AsyncHttpPipeline, AIProjectClient],
564
567
  **request_params,
565
568
  ) -> dict:
566
569
  """
@@ -569,7 +572,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
569
572
  Parameters
570
573
  ----------
571
574
  prompt: Prompt str to query model with.
572
- session: AsyncHttpPipeline object to use for the request.
575
+ session: Union[AsyncHttpPipeline, AIProjectClient] object to use for the request.
573
576
  **request_params: Additional parameters to pass to the request.
574
577
  """
575
578
  messages = [{"role": "system", "content": prompt}]
@@ -583,7 +586,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
583
586
  async def get_all_completions(
584
587
  self,
585
588
  prompts: List[str], # type: ignore[override]
586
- session: AsyncHttpPipeline,
589
+ session: Union[AsyncHttpPipeline, AIProjectClient],
587
590
  api_call_max_parallel_count: int = 1,
588
591
  api_call_delay_seconds: float = 0.1,
589
592
  request_error_rate_threshold: float = 0.5,
@@ -3,11 +3,6 @@ name: TaskSimulatorQueryResponse
3
3
  description: Gets queries and responses from a blob of text
4
4
  model:
5
5
  api: chat
6
- configuration:
7
- type: azure_openai
8
- azure_deployment: ${env:AZURE_DEPLOYMENT}
9
- api_key: ${env:AZURE_OPENAI_API_KEY}
10
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
11
6
  parameters:
12
7
  temperature: 0.0
13
8
  top_p: 1.0
@@ -33,15 +28,16 @@ Answer must not be more than 5 words
33
28
  Answer must be picked from Text as is
34
29
  Question should be as descriptive as possible and must include as much context as possible from Text
35
30
  Output must always have the provided number of QnAs
36
- Output must be in JSON format
31
+ Output must be in JSON format.
32
+ Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable.
37
33
  Text:
38
34
  <|text_start|>
39
35
  On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States.
40
36
  Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%.
41
37
  <|text_end|>
42
38
  Output with 5 QnAs:
43
- [
44
- {
39
+ {
40
+ "qna":[{
45
41
  "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?",
46
42
  "r": "January 24, 1984"
47
43
  },
@@ -60,8 +56,8 @@ Output with 5 QnAs:
60
56
  {
61
57
  "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?",
62
58
  "r": "6%"
63
- }
64
- ]
59
+ }]
60
+ }
65
61
  Text:
66
62
  <|text_start|>
67
63
  {{ text }}
@@ -3,10 +3,6 @@ name: TaskSimulatorWithPersona
3
3
  description: Simulates a user to complete a conversation
4
4
  model:
5
5
  api: chat
6
- configuration:
7
- type: azure_openai
8
- azure_deployment: ${env:AZURE_DEPLOYMENT}
9
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
10
6
  parameters:
11
7
  temperature: 0.0
12
8
  top_p: 1.0
@@ -20,6 +16,9 @@ inputs:
20
16
  type: string
21
17
  conversation_history:
22
18
  type: dict
19
+ action:
20
+ type: string
21
+ default: continue the converasation and make sure the task is completed by asking relevant questions
23
22
 
24
23
  ---
25
24
  system:
@@ -29,8 +28,10 @@ Output must be in JSON format
29
28
  Here's a sample output:
30
29
  {
31
30
  "content": "Here is my follow-up question.",
32
- "user": "user"
31
+ "role": "user"
33
32
  }
34
33
 
35
34
  Output with a json object that continues the conversation, given the conversation history:
36
35
  {{ conversation_history }}
36
+
37
+ {{ action }}