azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  # flake8: noqa
2
- # pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611
2
+ # pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0114,R0913,E0702,R0903,C0411
3
3
  # ---------------------------------------------------------
4
4
  # Copyright (c) Microsoft Corporation. All rights reserved.
5
5
  # ---------------------------------------------------------
@@ -7,19 +7,22 @@ import asyncio
7
7
  import importlib.resources as pkg_resources
8
8
  import json
9
9
  import os
10
+ import random
10
11
  import re
11
12
  import warnings
12
- from typing import Any, Callable, Dict, List, Optional, Union
13
+ from typing import Any, Callable, Dict, List, Optional, Union, Tuple
13
14
 
14
- from promptflow.client import load_flow
15
- from promptflow.core import AzureOpenAIModelConfiguration, Flow
15
+ from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
16
16
  from tqdm import tqdm
17
17
 
18
- from .._user_agent import USER_AGENT
19
- from ._conversation.constants import ConversationRole
20
- from ._helpers import ConversationHistory, Turn, experimental
18
+ from azure.ai.evaluation._common._experimental import experimental
19
+ from azure.ai.evaluation._common.utils import construct_prompty_model_config
20
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
21
21
 
22
- # from ._tracing import monitor_task_simulator
22
+ from .._exceptions import ErrorBlame, ErrorCategory, EvaluationException
23
+ from .._user_agent import UserAgentSingleton
24
+ from ._conversation.constants import ConversationRole
25
+ from ._helpers import ConversationHistory, Turn
23
26
  from ._utils import JsonLineChatProtocol
24
27
 
25
28
 
@@ -27,53 +30,82 @@ from ._utils import JsonLineChatProtocol
27
30
  class Simulator:
28
31
  """
29
32
  Simulator for generating synthetic conversations.
33
+
34
+ :param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration.
35
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration, ~azure.ai.evaluation.OpenAIModelConfiguration]
36
+ :raises ValueError: If the model_config does not contain the required keys or any value is None.
37
+
38
+ .. admonition:: Example:
39
+
40
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
41
+ :start-after: [START nonadversarial_simulator]
42
+ :end-before: [END nonadversarial_simulator]
43
+ :language: python
44
+ :dedent: 8
45
+ :caption: Run a Simulator for 2 queries and 4 conversation turns.
30
46
  """
31
47
 
32
- def __init__(self, azure_ai_project: Dict[str, Any], credential: Optional[Any] = None):
33
- """
34
- Initializes the task simulator with a project scope.
48
+ def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
49
+ self._validate_model_config(model_config)
50
+ self.model_config = model_config
51
+ if "api_version" not in self.model_config:
52
+ self.model_config["api_version"] = "2024-06-01" # type: ignore
35
53
 
36
- :param azure_ai_project: A dictionary defining the scope of the project, including keys such as
37
- "subscription_id", "resource_group_name", and "project_name".
38
- :param credential: Azure credentials to authenticate the user. If None, the default credentials are used.
39
- :paramtype credential: Optional[Any]
40
- :raises ValueError: If the azure_ai_project does not contain the required keys or any value is None.
41
- """
42
- self._validate_project_config(azure_ai_project)
43
- self.azure_ai_project = azure_ai_project
44
- self.azure_ai_project["api_version"] = "2024-02-15-preview"
45
- self.credential = credential
54
+ @staticmethod
55
+ def __user_agent() -> str:
56
+ return f"{UserAgentSingleton().value} (type=simulator; subtype=Simulator)"
46
57
 
47
58
  @staticmethod
48
- def _validate_project_config(azure_ai_project: Dict[str, Any]):
59
+ def _validate_model_config(model_config: Any):
49
60
  """
50
- Validates the azure_ai_project configuration to ensure all required keys are present and have non-None values.
61
+ Validates the model_config to ensure all required keys are present and have non-None values.
62
+ If 'type' is not specified, it will attempt to infer the type based on the keys present.
51
63
 
52
- :param azure_ai_project: The Azure AI project configuration dictionary.
53
- :type azure_ai_project: Dict[str, Any]
64
+ :param model_config: The model configuration dictionary.
65
+ :type model_config: Dict[str, Any]
54
66
  :raises ValueError: If required keys are missing or any of the values are None.
55
67
  """
56
- required_keys = ["subscription_id", "resource_group_name", "project_name"]
57
- if not all(key in azure_ai_project for key in required_keys):
58
- raise ValueError(f"azure_ai_project must contain keys: {', '.join(required_keys)}")
59
- if not all(azure_ai_project[key] for key in required_keys):
60
- raise ValueError("subscription_id, resource_group_name, and project_name must not be None")
68
+ # Attempt to infer 'type' if not provided
69
+ if "type" not in model_config:
70
+ if "azure_deployment" in model_config and "azure_endpoint" in model_config:
71
+ model_config["type"] = "azure_openai"
72
+ elif "model" in model_config:
73
+ model_config["type"] = "openai"
74
+ else:
75
+ raise ValueError(
76
+ "Unable to infer 'type' from model_config. Please specify 'type' as 'azure_openai' or 'openai'."
77
+ )
78
+
79
+ if model_config["type"] == "azure_openai":
80
+ required_keys = ["azure_deployment", "azure_endpoint"]
81
+ elif model_config["type"] == "openai":
82
+ required_keys = ["api_key", "model"]
83
+ else:
84
+ raise ValueError("model_config 'type' must be 'azure_openai' or 'openai'.")
85
+
86
+ missing_keys = [key for key in required_keys if key not in model_config]
87
+ if missing_keys:
88
+ raise ValueError(f"model_config is missing required keys: {', '.join(missing_keys)}")
89
+ none_keys = [key for key in required_keys if model_config.get(key) is None]
90
+ if none_keys:
91
+ raise ValueError(f"The following keys in model_config must not be None: {', '.join(none_keys)}")
61
92
 
62
- # @monitor_task_simulator
63
93
  async def __call__(
64
94
  self,
65
95
  *,
66
96
  target: Callable,
67
97
  max_conversation_turns: int = 5,
68
- tasks: List[Dict] = [],
98
+ tasks: List[str] = [],
69
99
  text: str = "",
70
100
  num_queries: int = 5,
71
101
  query_response_generating_prompty: Optional[str] = None,
72
102
  user_simulator_prompty: Optional[str] = None,
73
103
  api_call_delay_sec: float = 1,
74
- query_response_generating_prompty_kwargs: Dict[str, Any] = {},
75
- user_simulator_prompty_kwargs: Dict[str, Any] = {},
76
- conversation_turns: List[List[str]] = [],
104
+ query_response_generating_prompty_options: Dict[str, Any] = {},
105
+ user_simulator_prompty_options: Dict[str, Any] = {},
106
+ conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [],
107
+ concurrent_async_tasks: int = 5,
108
+ randomization_seed: Optional[int] = None,
77
109
  **kwargs,
78
110
  ) -> List[JsonLineChatProtocol]:
79
111
  """
@@ -95,12 +127,18 @@ class Simulator:
95
127
  :paramtype user_simulator_prompty: Optional[str]
96
128
  :keyword api_call_delay_sec: Delay in seconds between API calls.
97
129
  :paramtype api_call_delay_sec: float
98
- :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
99
- :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
100
- :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
101
- :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
130
+ :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty.
131
+ :paramtype query_response_generating_prompty_options: Dict[str, Any]
132
+ :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
133
+ :paramtype user_simulator_prompty_options: Dict[str, Any]
102
134
  :keyword conversation_turns: Predefined conversation turns to simulate.
103
- :paramtype conversation_turns: List[List[str]]
135
+ :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]]
136
+ :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
137
+ Defaults to 5.
138
+ :paramtype concurrent_async_tasks: int
139
+ :keyword randomization_seed: The seed used to randomize task/query order. If unset, the system's
140
+ default seed is used. Defaults to None.
141
+ :paramtype randomization_seed: Optional[int]
104
142
  :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
105
143
  :rtype: List[JsonLineChatProtocol]
106
144
 
@@ -109,79 +147,75 @@ class Simulator:
109
147
 
110
148
  Modes:
111
149
  - Task-Free Mode: When only num_queries is specified and tasks is not, the method generates num_queries x max_conversation_turns lines of simulated data grounded in the context of the text.
112
- - Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines are simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used.
150
+ - Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines will be simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used.
113
151
  - Conversation Starter Mode: When conversation_turns are specified, the method starts each conversation with the user-specified queries and then follows the conversation history for the remaining turns.
114
152
  """
115
153
  if conversation_turns and (text or tasks):
116
154
  raise ValueError("Cannot specify both conversation_turns and text/tasks")
117
155
 
118
- if num_queries > len(tasks):
156
+ if text and num_queries > len(tasks):
119
157
  warnings.warn(
120
158
  f"You have specified 'num_queries' > len('tasks') ({num_queries} > {len(tasks)}). "
121
159
  f"All tasks will be used for generation and the remaining {num_queries - len(tasks)} lines will be simulated in task-free mode"
122
160
  )
123
- elif num_queries < len(tasks):
161
+ elif text and num_queries < len(tasks):
124
162
  warnings.warn(
125
163
  f"You have specified 'num_queries' < len('tasks') ({num_queries} < {len(tasks)}). "
126
164
  f"Only the first {num_queries} lines of the specified tasks will be simulated."
127
165
  )
128
- num_queries = min(num_queries, len(tasks))
129
- max_conversation_turns *= 2 # account for both user and assistant turns
130
166
 
131
- prompty_model_config = self._build_prompty_model_config()
167
+ # Apply randomization to tasks if seed is provided
168
+ if randomization_seed is not None and tasks:
169
+ # Create a local random instance to avoid polluting global state
170
+ local_random = random.Random(randomization_seed)
171
+ tasks = tasks.copy() # Don't modify the original list
172
+ local_random.shuffle(tasks)
132
173
 
174
+ max_conversation_turns *= 2 # account for both user and assistant turns
175
+
176
+ prompty_model_config = self.model_config
133
177
  if conversation_turns:
134
178
  return await self._simulate_with_predefined_turns(
135
179
  target=target,
136
180
  max_conversation_turns=max_conversation_turns,
137
181
  conversation_turns=conversation_turns,
138
182
  user_simulator_prompty=user_simulator_prompty,
139
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
183
+ user_simulator_prompty_options=user_simulator_prompty_options,
140
184
  api_call_delay_sec=api_call_delay_sec,
141
185
  prompty_model_config=prompty_model_config,
186
+ concurrent_async_tasks=concurrent_async_tasks,
142
187
  )
143
188
 
144
189
  query_responses = await self._generate_query_responses(
145
190
  text=text,
146
191
  num_queries=num_queries,
147
192
  query_response_generating_prompty=query_response_generating_prompty,
148
- query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
193
+ query_response_generating_prompty_options=query_response_generating_prompty_options,
149
194
  prompty_model_config=prompty_model_config,
150
195
  **kwargs,
151
196
  )
152
-
153
197
  return await self._create_conversations_from_query_responses(
154
198
  query_responses=query_responses,
155
199
  max_conversation_turns=max_conversation_turns,
156
200
  tasks=tasks,
157
201
  user_simulator_prompty=user_simulator_prompty,
158
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
202
+ user_simulator_prompty_options=user_simulator_prompty_options,
159
203
  target=target,
160
204
  api_call_delay_sec=api_call_delay_sec,
205
+ text=text,
161
206
  )
162
207
 
163
- def _build_prompty_model_config(self) -> Dict[str, Any]:
164
- """
165
- Constructs the configuration for the prompty model.
166
-
167
- :return: A dictionary containing the prompty model configuration, including API version and user agent headers if applicable.
168
- :rtype: Dict[str, Any]
169
- """
170
- config = {"configuration": self.azure_ai_project}
171
- if USER_AGENT and isinstance(self.azure_ai_project, AzureOpenAIModelConfiguration):
172
- config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})
173
- return config
174
-
175
208
  async def _simulate_with_predefined_turns(
176
209
  self,
177
210
  *,
178
211
  target: Callable,
179
212
  max_conversation_turns: int,
180
- conversation_turns: List[List[str]],
213
+ conversation_turns: List[List[Union[str, Dict[str, Any]]]],
181
214
  user_simulator_prompty: Optional[str],
182
- user_simulator_prompty_kwargs: Dict[str, Any],
215
+ user_simulator_prompty_options: Dict[str, Any],
183
216
  api_call_delay_sec: float,
184
- prompty_model_config: Dict[str, Any],
217
+ prompty_model_config: Any,
218
+ concurrent_async_tasks: int,
185
219
  ) -> List[JsonLineChatProtocol]:
186
220
  """
187
221
  Simulates conversations using predefined conversation turns.
@@ -191,54 +225,81 @@ class Simulator:
191
225
  :keyword max_conversation_turns: Maximum number of turns for the simulation.
192
226
  :paramtype max_conversation_turns: int
193
227
  :keyword conversation_turns: A list of predefined conversation turns.
194
- :paramtype conversation_turns: List[List[str]]
228
+ :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]]
195
229
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
196
230
  :paramtype user_simulator_prompty: Optional[str]
197
- :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
198
- :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
231
+ :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
232
+ :paramtype user_simulator_prompty_options: Dict[str, Any]
199
233
  :keyword api_call_delay_sec: Delay in seconds between API calls.
200
234
  :paramtype api_call_delay_sec: float
201
235
  :keyword prompty_model_config: The configuration for the prompty model.
202
- :paramtype prompty_model_config: Dict[str, Any]
236
+ :paramtype prompty_model_config: Any
237
+ :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
238
+ :paramtype concurrent_async_tasks: int
203
239
  :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
204
240
  :rtype: List[JsonLineChatProtocol]
205
241
  """
206
- simulated_conversations = []
207
242
  progress_bar = tqdm(
208
243
  total=int(len(conversation_turns) * (max_conversation_turns / 2)),
209
244
  desc="Simulating with predefined conversation turns: ",
210
245
  ncols=100,
211
246
  unit="messages",
212
247
  )
213
-
214
- for simulation in conversation_turns:
215
- current_simulation = ConversationHistory()
216
- for simulated_turn in simulation:
217
- user_turn = Turn(role=ConversationRole.USER, content=simulated_turn)
218
- current_simulation.add_to_history(user_turn)
219
- assistant_response = await self._get_target_response(
220
- target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
221
- )
222
- assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response)
223
- current_simulation.add_to_history(assistant_turn)
224
- progress_bar.update(1) # Update progress bar for both user and assistant turns
225
-
226
- if len(current_simulation) < max_conversation_turns:
227
- await self._extend_conversation_with_simulator(
228
- current_simulation=current_simulation,
229
- max_conversation_turns=max_conversation_turns,
230
- user_simulator_prompty=user_simulator_prompty,
231
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
232
- api_call_delay_sec=api_call_delay_sec,
233
- prompty_model_config=prompty_model_config,
234
- target=target,
235
- progress_bar=progress_bar,
248
+ semaphore = asyncio.Semaphore(concurrent_async_tasks)
249
+ progress_bar_lock = asyncio.Lock()
250
+
251
+ async def run_simulation(simulation: List[Union[str, Dict[str, Any]]]) -> JsonLineChatProtocol:
252
+ async with semaphore:
253
+ current_simulation = ConversationHistory()
254
+ for simulated_turn in simulation:
255
+ if isinstance(simulated_turn, str):
256
+ user_turn = Turn(role=ConversationRole.USER, content=simulated_turn)
257
+ elif isinstance(simulated_turn, dict):
258
+ user_turn = Turn(
259
+ role=ConversationRole.USER,
260
+ content=str(simulated_turn.get("content")),
261
+ context=str(simulated_turn.get("context")),
262
+ )
263
+ else:
264
+ raise ValueError(
265
+ "Each simulated turn must be a string or a dict with 'content' and 'context' keys"
266
+ )
267
+ current_simulation.add_to_history(user_turn)
268
+ assistant_response, assistant_context = await self._get_target_response(
269
+ target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
270
+ )
271
+ assistant_turn = Turn(
272
+ role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context
273
+ )
274
+ current_simulation.add_to_history(assistant_turn)
275
+ async with progress_bar_lock:
276
+ progress_bar.update(1)
277
+
278
+ if len(current_simulation) < max_conversation_turns:
279
+ await self._extend_conversation_with_simulator(
280
+ current_simulation=current_simulation,
281
+ max_conversation_turns=max_conversation_turns,
282
+ user_simulator_prompty=user_simulator_prompty,
283
+ user_simulator_prompty_options=user_simulator_prompty_options,
284
+ api_call_delay_sec=api_call_delay_sec,
285
+ prompty_model_config=prompty_model_config,
286
+ target=target,
287
+ progress_bar=progress_bar,
288
+ progress_bar_lock=progress_bar_lock,
289
+ )
290
+ return JsonLineChatProtocol(
291
+ {
292
+ "messages": current_simulation.to_list(),
293
+ "finish_reason": ["stop"],
294
+ "context": {},
295
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
296
+ }
236
297
  )
237
298
 
238
- simulated_conversations.append(current_simulation.to_list())
239
-
299
+ tasks = [asyncio.create_task(run_simulation(simulation)) for simulation in conversation_turns]
300
+ results = await asyncio.gather(*tasks)
240
301
  progress_bar.close()
241
- return simulated_conversations
302
+ return results
242
303
 
243
304
  async def _extend_conversation_with_simulator(
244
305
  self,
@@ -246,11 +307,12 @@ class Simulator:
246
307
  current_simulation: ConversationHistory,
247
308
  max_conversation_turns: int,
248
309
  user_simulator_prompty: Optional[str],
249
- user_simulator_prompty_kwargs: Dict[str, Any],
310
+ user_simulator_prompty_options: Dict[str, Any],
250
311
  api_call_delay_sec: float,
251
312
  prompty_model_config: Dict[str, Any],
252
313
  target: Callable,
253
314
  progress_bar: tqdm,
315
+ progress_bar_lock: asyncio.Lock,
254
316
  ):
255
317
  """
256
318
  Extends an ongoing conversation using a user simulator until the maximum number of turns is reached.
@@ -261,8 +323,8 @@ class Simulator:
261
323
  :paramtype max_conversation_turns: int,
262
324
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
263
325
  :paramtype user_simulator_prompty: Optional[str],
264
- :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
265
- :paramtype user_simulator_prompty_kwargs: Dict[str, Any],
326
+ :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
327
+ :paramtype user_simulator_prompty_options: Dict[str, Any],
266
328
  :keyword api_call_delay_sec: Delay in seconds between API calls.
267
329
  :paramtype api_call_delay_sec: float,
268
330
  :keyword prompty_model_config: The configuration for the prompty model.
@@ -271,68 +333,92 @@ class Simulator:
271
333
  :paramtype target: Callable,
272
334
  :keyword progress_bar: Progress bar for tracking simulation progress.
273
335
  :paramtype progress_bar: tqdm,
336
+ :keyword progress_bar_lock: Lock for updating the progress bar safely.
337
+ :paramtype progress_bar_lock: asyncio.Lock
274
338
  """
275
339
  user_flow = self._load_user_simulation_flow(
276
- user_simulator_prompty=user_simulator_prompty,
340
+ user_simulator_prompty=user_simulator_prompty, # type: ignore
277
341
  prompty_model_config=prompty_model_config,
278
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
342
+ user_simulator_prompty_options=user_simulator_prompty_options,
279
343
  )
280
344
 
281
345
  while len(current_simulation) < max_conversation_turns:
282
- user_response_content = user_flow(
283
- task="Continue the conversation", conversation_history=current_simulation.to_list()
346
+ user_response_content = await user_flow(
347
+ task="Continue the conversation",
348
+ conversation_history=current_simulation.to_context_free_list(),
349
+ **user_simulator_prompty_options,
284
350
  )
285
351
  user_response = self._parse_prompty_response(response=user_response_content)
286
352
  user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
287
353
  current_simulation.add_to_history(user_turn)
288
354
  await asyncio.sleep(api_call_delay_sec)
289
- assistant_response = await self._get_target_response(
355
+ assistant_response, assistant_context = await self._get_target_response(
290
356
  target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
291
357
  )
292
- assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response)
358
+ assistant_turn = Turn(
359
+ role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context
360
+ )
293
361
  current_simulation.add_to_history(assistant_turn)
294
- progress_bar.update(1)
362
+ async with progress_bar_lock:
363
+ progress_bar.update(1)
295
364
 
296
365
  def _load_user_simulation_flow(
297
366
  self,
298
367
  *,
299
- user_simulator_prompty: Union[str, os.PathLike],
368
+ user_simulator_prompty: Optional[Union[str, os.PathLike]],
300
369
  prompty_model_config: Dict[str, Any],
301
- user_simulator_prompty_kwargs: Dict[str, Any],
302
- ) -> Flow:
370
+ user_simulator_prompty_options: Dict[str, Any],
371
+ ) -> "AsyncPrompty": # type: ignore
303
372
  """
304
373
  Loads the flow for simulating user interactions.
305
374
 
306
375
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
307
- :paramtype user_simulator_prompty: Union[str, os.PathLike]
376
+ :paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]]
308
377
  :keyword prompty_model_config: The configuration for the prompty model.
309
378
  :paramtype prompty_model_config: Dict[str, Any]
310
- :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
311
- :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
379
+ :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
380
+ :paramtype user_simulator_prompty_options: Dict[str, Any]
312
381
  :return: The loaded flow for simulating user interactions.
313
- :rtype: Flow
382
+ :rtype: AsyncPrompty
314
383
  """
315
384
  if not user_simulator_prompty:
316
385
  package = "azure.ai.evaluation.simulator._prompty"
317
386
  resource_name = "task_simulate.prompty"
318
387
  try:
319
388
  # Access the resource as a file path
389
+ # pylint: disable=deprecated-method
320
390
  with pkg_resources.path(package, resource_name) as prompty_path:
321
- return load_flow(source=str(prompty_path), model=prompty_model_config)
391
+ prompty_model_config = construct_prompty_model_config(
392
+ model_config=prompty_model_config, # type: ignore
393
+ default_api_version="2024-06-01",
394
+ user_agent=self.__user_agent(),
395
+ )
396
+ return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
322
397
  except FileNotFoundError as e:
323
- raise f"Flow path for {resource_name} does not exist in package {package}." from e
324
- return load_flow(
398
+ msg = f"Flow path for {resource_name} does not exist in package {package}."
399
+ raise EvaluationException(
400
+ message=msg,
401
+ internal_message=msg,
402
+ error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
403
+ blame=ErrorBlame.USER_ERROR,
404
+ ) from e
405
+ prompty_model_config = construct_prompty_model_config(
406
+ model_config=prompty_model_config, # type: ignore
407
+ default_api_version="2024-06-01",
408
+ user_agent=self.__user_agent(),
409
+ )
410
+ return AsyncPrompty.load(
325
411
  source=user_simulator_prompty,
326
412
  model=prompty_model_config,
327
- **user_simulator_prompty_kwargs,
328
- )
413
+ **user_simulator_prompty_options,
414
+ ) # type: ignore
329
415
 
330
416
  def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]:
331
417
  """
332
418
  Parses the response from the prompty execution.
333
419
 
334
420
  :keyword response: The raw response from the prompty.
335
- :paramtype str: str
421
+ :paramtype response: str
336
422
  :return: A dictionary representing the parsed response content.
337
423
  :rtype: Dict[str, Any]
338
424
  :raises ValueError: If the response cannot be parsed.
@@ -372,8 +458,8 @@ class Simulator:
372
458
  text: str,
373
459
  num_queries: int,
374
460
  query_response_generating_prompty: Optional[str],
375
- query_response_generating_prompty_kwargs: Dict[str, Any],
376
- prompty_model_config: Dict[str, Any],
461
+ query_response_generating_prompty_options: Dict[str, Any],
462
+ prompty_model_config: Any,
377
463
  **kwargs,
378
464
  ) -> List[Dict[str, str]]:
379
465
  """
@@ -385,25 +471,32 @@ class Simulator:
385
471
  :paramtype num_queries: int
386
472
  :keyword query_response_generating_prompty: Path to the query response generating prompty file.
387
473
  :paramtype query_response_generating_prompty: Optional[str]
388
- :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
389
- :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
474
+ :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty.
475
+ :paramtype query_response_generating_prompty_options: Dict[str, Any]
390
476
  :keyword prompty_model_config: The configuration for the prompty model.
391
- :paramtype prompty_model_config: Dict[str, Any]
477
+ :paramtype prompty_model_config: Any
392
478
  :return: A list of query-response dictionaries.
393
479
  :rtype: List[Dict[str, str]]
394
480
  :raises RuntimeError: If an error occurs during query generation.
395
481
  """
396
482
  query_flow = self._load_query_generation_flow(
397
- query_response_generating_prompty=query_response_generating_prompty,
483
+ query_response_generating_prompty=query_response_generating_prompty, # type: ignore
398
484
  prompty_model_config=prompty_model_config,
399
- query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
485
+ query_response_generating_prompty_options=query_response_generating_prompty_options,
400
486
  )
401
-
402
487
  try:
403
- query_responses = query_flow(text=text, num_queries=num_queries)
488
+ query_responses = await query_flow(text=text, num_queries=num_queries)
404
489
  if isinstance(query_responses, dict):
405
490
  keys = list(query_responses.keys())
406
491
  return query_responses[keys[0]]
492
+ if isinstance(query_responses, str):
493
+ query_responses = json.loads(query_responses)
494
+ if isinstance(query_responses, dict):
495
+ if len(query_responses.keys()) == 1:
496
+ return query_responses[list(query_responses.keys())[0]]
497
+ return query_responses # type: ignore
498
+ if isinstance(query_responses, list):
499
+ return query_responses
407
500
  return json.loads(query_responses)
408
501
  except Exception as e:
409
502
  raise RuntimeError("Error generating query responses") from e
@@ -411,47 +504,65 @@ class Simulator:
411
504
  def _load_query_generation_flow(
412
505
  self,
413
506
  *,
414
- query_response_generating_prompty: Union[str, os.PathLike],
507
+ query_response_generating_prompty: Optional[Union[str, os.PathLike]],
415
508
  prompty_model_config: Dict[str, Any],
416
- query_response_generating_prompty_kwargs: Dict[str, Any],
417
- ) -> Flow:
509
+ query_response_generating_prompty_options: Dict[str, Any],
510
+ ) -> "AsyncPrompty":
418
511
  """
419
512
  Loads the flow for generating query responses.
420
513
 
421
514
  :keyword query_response_generating_prompty: Path to the query response generating prompty file.
422
- :paramtype query_response_generating_prompty: Union[str, os.PathLike]
515
+ :paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]]
423
516
  :keyword prompty_model_config: The configuration for the prompty model.
424
517
  :paramtype prompty_model_config: Dict[str, Any]
425
- :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
426
- :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
518
+ :keyword query_response_generating_prompty_options: Additional keyword arguments for the flow.
519
+ :paramtype query_response_generating_prompty_options: Dict[str, Any]
427
520
  :return: The loaded flow for generating query responses.
428
- :rtype: Flow
521
+ :rtype: AsyncPrompty
429
522
  """
430
523
  if not query_response_generating_prompty:
431
524
  package = "azure.ai.evaluation.simulator._prompty"
432
525
  resource_name = "task_query_response.prompty"
433
526
  try:
434
527
  # Access the resource as a file path
528
+ # pylint: disable=deprecated-method
435
529
  with pkg_resources.path(package, resource_name) as prompty_path:
436
- return load_flow(source=str(prompty_path), model=prompty_model_config)
530
+ prompty_model_config = construct_prompty_model_config(
531
+ model_config=prompty_model_config, # type: ignore
532
+ default_api_version="2024-06-01",
533
+ user_agent=self.__user_agent(),
534
+ )
535
+ return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
437
536
  except FileNotFoundError as e:
438
- raise f"Flow path for {resource_name} does not exist in package {package}." from e
439
- return load_flow(
537
+ msg = f"Flow path for {resource_name} does not exist in package {package}."
538
+ raise EvaluationException(
539
+ message=msg,
540
+ internal_message=msg,
541
+ error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
542
+ blame=ErrorBlame.USER_ERROR,
543
+ ) from e
544
+ prompty_model_config = construct_prompty_model_config(
545
+ model_config=prompty_model_config, # type: ignore
546
+ default_api_version="2024-06-01",
547
+ user_agent=self.__user_agent(),
548
+ )
549
+ return AsyncPrompty.load(
440
550
  source=query_response_generating_prompty,
441
551
  model=prompty_model_config,
442
- **query_response_generating_prompty_kwargs,
443
- )
552
+ **query_response_generating_prompty_options,
553
+ ) # type: ignore
444
554
 
445
555
  async def _create_conversations_from_query_responses(
446
556
  self,
447
557
  *,
448
558
  query_responses: List[Dict[str, str]],
449
559
  max_conversation_turns: int,
450
- tasks: List[Dict],
560
+ tasks: List[str],
451
561
  user_simulator_prompty: Optional[str],
452
- user_simulator_prompty_kwargs: Dict[str, Any],
562
+ user_simulator_prompty_options: Dict[str, Any],
453
563
  target: Callable,
454
564
  api_call_delay_sec: float,
565
+ text: str,
455
566
  ) -> List[JsonLineChatProtocol]:
456
567
  """
457
568
  Creates full conversations from query-response pairs.
@@ -461,15 +572,17 @@ class Simulator:
461
572
  :keyword max_conversation_turns: The maximum number of conversation turns.
462
573
  :paramtype max_conversation_turns: int
463
574
  :keyword tasks: A list of tasks for the simulation.
464
- :paramtype tasks: List[Dict]
575
+ :paramtype tasks: List[str]
465
576
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
466
577
  :paramtype user_simulator_prompty: Optional[str]
467
- :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
468
- :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
578
+ :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
579
+ :paramtype user_simulator_prompty_options: Dict[str, Any]
469
580
  :keyword target: The target function to call for responses.
470
581
  :paramtype target: Callable
471
582
  :keyword api_call_delay_sec: Delay in seconds between API calls.
472
583
  :paramtype api_call_delay_sec: float
584
+ :keyword text: The initial input text for generating query responses.
585
+ :paramtype text: str
473
586
  :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
474
587
  :rtype: List[JsonLineChatProtocol]
475
588
  """
@@ -486,14 +599,17 @@ class Simulator:
486
599
  for i, query_response_pair in enumerate(query_responses):
487
600
  query = query_response_pair["q"]
488
601
  response = query_response_pair["r"]
489
- task = tasks[i]
602
+ try:
603
+ task = tasks[i]
604
+ except IndexError:
605
+ task = None
490
606
 
491
607
  conversation = await self._complete_conversation(
492
608
  conversation_starter=query,
493
609
  max_conversation_turns=max_conversation_turns,
494
- task=task,
610
+ task=task, # type: ignore
495
611
  user_simulator_prompty=user_simulator_prompty,
496
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
612
+ user_simulator_prompty_options=user_simulator_prompty_options,
497
613
  target=target,
498
614
  api_call_delay_sec=api_call_delay_sec,
499
615
  progress_bar=progress_bar,
@@ -507,6 +623,7 @@ class Simulator:
507
623
  "task": task,
508
624
  "expected_response": response,
509
625
  "query": query,
626
+ "original_text": text,
510
627
  },
511
628
  "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
512
629
  }
@@ -520,13 +637,13 @@ class Simulator:
520
637
  *,
521
638
  conversation_starter: str,
522
639
  max_conversation_turns: int,
523
- task: str,
640
+ task: Optional[str],
524
641
  user_simulator_prompty: Optional[str],
525
- user_simulator_prompty_kwargs: Dict[str, Any],
642
+ user_simulator_prompty_options: Dict[str, Any],
526
643
  target: Callable,
527
644
  api_call_delay_sec: float,
528
645
  progress_bar: tqdm,
529
- ) -> List[Dict[str, str]]:
646
+ ) -> List[Dict[str, Optional[str]]]:
530
647
  """
531
648
  Completes a conversation with the target model based on the conversation starter.
532
649
 
@@ -538,8 +655,8 @@ class Simulator:
538
655
  :paramtype task: str
539
656
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
540
657
  :paramtype user_simulator_prompty: Optional[str]
541
- :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
542
- :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
658
+ :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
659
+ :paramtype user_simulator_prompty_options: Dict[str, Any]
543
660
  :keyword target: The target function to call for responses.
544
661
  :paramtype target: Callable
545
662
  :keyword api_call_delay_sec: Delay in seconds between API calls.
@@ -547,36 +664,48 @@ class Simulator:
547
664
  :keyword progress_bar: Progress bar for tracking simulation progress.
548
665
  :paramtype progress_bar: tqdm
549
666
  :return: A list representing the conversation history with each turn's content.
550
- :rtype: List[Dict[str, str]]
667
+ :rtype: List[Dict[str, Optional[str]]]
551
668
  """
552
669
  conversation_history = ConversationHistory()
553
- # user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
554
- # conversation_history.add_to_history(user_turn)
555
670
 
556
671
  while len(conversation_history) < max_conversation_turns:
557
672
  user_flow = self._load_user_simulation_flow(
558
- user_simulator_prompty=user_simulator_prompty,
559
- prompty_model_config=self._build_prompty_model_config(),
560
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
673
+ user_simulator_prompty=user_simulator_prompty, # type: ignore
674
+ prompty_model_config=self.model_config, # type: ignore
675
+ user_simulator_prompty_options=user_simulator_prompty_options,
561
676
  )
562
- conversation_starter_from_simulated_user = user_flow(
563
- task=task,
564
- conversation_history=[
565
- {
566
- "role": "assistant",
677
+ if len(conversation_history) == 0:
678
+ if task:
679
+ conversation_starter_from_simulated_user = await user_flow(
680
+ task=task,
681
+ conversation_history=[
682
+ {
683
+ "role": "assistant",
684
+ "content": conversation_starter,
685
+ }
686
+ ],
687
+ action="rewrite the assistant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.",
688
+ )
689
+ else:
690
+ conversation_starter_from_simulated_user = {
567
691
  "content": conversation_starter,
568
- "your_task": "Act as the user and translate the content into a user query.",
569
692
  }
570
- ],
571
- )
693
+ else:
694
+ conversation_starter_from_simulated_user = await user_flow(
695
+ task=task,
696
+ conversation_history=conversation_history.to_context_free_list(),
697
+ action="Your goal is to make sure the task is completed by asking the right questions. Do not ask the same questions again.",
698
+ )
572
699
  if isinstance(conversation_starter_from_simulated_user, dict):
573
700
  conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"]
574
701
  user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user)
575
702
  conversation_history.add_to_history(user_turn)
576
- assistant_response = await self._get_target_response(
703
+ assistant_response, assistant_context = await self._get_target_response(
577
704
  target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=conversation_history
578
705
  )
579
- assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response)
706
+ assistant_turn = Turn(
707
+ role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context
708
+ )
580
709
  conversation_history.add_to_history(assistant_turn)
581
710
  progress_bar.update(1)
582
711
 
@@ -585,44 +714,9 @@ class Simulator:
585
714
 
586
715
  return conversation_history.to_list()
587
716
 
588
- async def _build_user_simulation_response(
589
- self,
590
- task: str,
591
- conversation_history: List[Dict[str, Any]],
592
- user_simulator_prompty: Optional[str],
593
- user_simulator_prompty_kwargs: Dict[str, Any],
594
- ) -> str:
595
- """
596
- Builds a response from the user simulator based on the current conversation history.
597
-
598
- :param task: A string representing the task details.
599
- :type task: str
600
- :param conversation_history: The current conversation history as a list of dictionaries.
601
- :type conversation_history: List[Dict[str, Any]]
602
- :param user_simulator_prompty: Path to the user simulator prompty file.
603
- :type user_simulator_prompty: Optional[str]
604
- :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
605
- :type user_simulator_prompty_kwargs: Dict[str, Any]
606
- :return: The generated response content from the user simulator.
607
- :rtype: str
608
- :raises RuntimeError: If an error occurs during response generation.
609
- """
610
- user_flow = self._load_user_simulation_flow(
611
- user_simulator_prompty=user_simulator_prompty,
612
- prompty_model_config=self._build_prompty_model_config(),
613
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
614
- )
615
-
616
- try:
617
- response_content = user_flow(task=task, conversation_history=conversation_history)
618
- user_response = self._parse_prompty_response(response=response_content)
619
- return user_response["content"]
620
- except Exception as e:
621
- raise RuntimeError("Error building user simulation response") from e
622
-
623
717
  async def _get_target_response(
624
718
  self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
625
- ) -> str:
719
+ ) -> Tuple[str, Optional[str]]:
626
720
  """
627
721
  Retrieves the response from the target callback based on the current conversation history.
628
722
 
@@ -632,8 +726,8 @@ class Simulator:
632
726
  :paramtype api_call_delay_sec: float
633
727
  :keyword conversation_history: The current conversation history.
634
728
  :paramtype conversation_history: ConversationHistory
635
- :return: The content of the response from the target.
636
- :rtype: str
729
+ :return: The content of the response from the target and an optional context.
730
+ :rtype: str, Optional[str]
637
731
  """
638
732
  response = await target(
639
733
  messages={"messages": conversation_history.to_list()},
@@ -643,4 +737,4 @@ class Simulator:
643
737
  )
644
738
  await asyncio.sleep(api_call_delay_sec)
645
739
  latest_message = response["messages"][-1]
646
- return latest_message["content"]
740
+ return latest_message["content"], latest_message.get("context", "") # type: ignore