azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,62 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ import os
5
+ import types
6
+ from typing import Optional, Type
7
+
8
+ from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClient
9
+ from azure.ai.evaluation._evaluate._batch_run import RunSubmitterClient
10
+ from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
11
+ from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
12
+ inject_openai_api as ported_inject_openai_api,
13
+ recover_openai_api as ported_recover_openai_api,
14
+ )
15
+ from azure.ai.evaluation._constants import PF_DISABLE_TRACING
16
+ from azure.ai.evaluation._evaluate._utils import set_event_loop_policy
17
+
18
+
19
+ class TargetRunContext:
20
+ """Context manager for target batch run.
21
+
22
+ :param upload_snapshot: Whether to upload target snapshot.
23
+ :type upload_snapshot: bool
24
+ """
25
+
26
+ def __init__(self, client: BatchClient, upload_snapshot: bool = False) -> None:
27
+ self._client = client
28
+ self._upload_snapshot = upload_snapshot
29
+ self._original_cwd = os.getcwd()
30
+
31
+ def __enter__(self) -> None:
32
+ # Preserve current working directory, as PF may change it without restoring it afterward
33
+ self._original_cwd = os.getcwd()
34
+
35
+ # Address "[WinError 32] The process cannot access the file" error,
36
+ # caused by conflicts when the venv and target function are in the same directory.
37
+ # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
38
+ if not self._upload_snapshot:
39
+ os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
40
+
41
+ os.environ[PF_DISABLE_TRACING] = "true"
42
+
43
+ if isinstance(self._client, RunSubmitterClient):
44
+ ported_inject_openai_api()
45
+ # For addressing the issue of asyncio event loop closed on Windows
46
+ set_event_loop_policy()
47
+
48
+ def __exit__(
49
+ self,
50
+ exc_type: Optional[Type[BaseException]],
51
+ exc_value: Optional[BaseException],
52
+ exc_tb: Optional[types.TracebackType],
53
+ ) -> None:
54
+ os.chdir(self._original_cwd)
55
+
56
+ if not self._upload_snapshot:
57
+ os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
58
+
59
+ os.environ.pop(PF_DISABLE_TRACING, None)
60
+
61
+ if isinstance(self._client, RunSubmitterClient):
62
+ ported_recover_openai_api()
@@ -10,37 +10,24 @@ import posixpath
10
10
  import time
11
11
  import types
12
12
  import uuid
13
- from typing import Any, Dict, Optional, Set, Type
13
+ from typing import Any, Dict, List, Optional, Set, Type
14
14
  from urllib.parse import urlparse
15
15
 
16
- from promptflow._sdk.entities import Run
16
+ from azure.ai.evaluation._legacy._adapters.entities import Run
17
+ from typing_extensions import Self
17
18
 
18
19
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
19
20
  from azure.ai.evaluation._http_utils import get_http_client
20
21
  from azure.ai.evaluation._version import VERSION
21
22
  from azure.core.pipeline.policies import RetryPolicy
22
23
  from azure.core.rest import HttpResponse
24
+ from azure.core.exceptions import HttpResponseError
25
+ from azure.storage.blob import BlobServiceClient
26
+ from azure.ai.evaluation._azure._clients import LiteMLClient
23
27
 
24
28
  LOGGER = logging.getLogger(__name__)
25
29
 
26
30
 
27
- # Handle optional import. The azure libraries are only present if
28
- # promptflow-azure is installed.
29
- try:
30
- from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
31
- from azure.ai.ml.entities._datastore.datastore import Datastore
32
- from azure.storage.blob import BlobServiceClient
33
- except (ModuleNotFoundError, ImportError):
34
- # If the above mentioned modules cannot be imported, we are running
35
- # in local mode and MLClient in the constructor will be None, so
36
- # we will not arrive to Azure-dependent code.
37
-
38
- # We are logging the import failure only if debug logging level is set because:
39
- # - If the project configuration was not provided this import is not needed.
40
- # - If the project configuration was provided, the error will be raised by PFClient.
41
- LOGGER.debug("promptflow.azure is not installed.")
42
-
43
-
44
31
  @dataclasses.dataclass
45
32
  class RunInfo:
46
33
  """
@@ -89,18 +76,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
89
76
  :type group_name: str
90
77
  :param workspace_name: The name of workspace/project used to track run.
91
78
  :type workspace_name: str
92
- :param ml_client: The ml client used for authentication into Azure.
93
- :type ml_client: azure.ai.ml.MLClient
79
+ :param management_client: The trace destination string to parse the AI ML workspace blob store from.
80
+ :type management_client:
81
+ ~azure.ai.evaluation._promptflow.azure._lite_azure_management_client.LiteMLClient
94
82
  :param promptflow_run: The promptflow run used by the
83
+ :type promptflow_run: Optional[promptflow._sdk.entities.Run]
84
+ :param tags: A dictionary of tags to be added to the evaluation run for tracking and organization purposes.
85
+ :type tags: Optional[Dict[str, str]]
95
86
  """
96
87
 
97
88
  _MAX_RETRIES = 5
98
89
  _BACKOFF_FACTOR = 2
99
90
  _TIMEOUT = 5
100
- _SCOPE = "https://management.azure.com/.default"
101
91
 
102
92
  EVALUATION_ARTIFACT = "instance_results.jsonl"
103
- EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
104
93
 
105
94
  def __init__(
106
95
  self,
@@ -109,20 +98,22 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
109
98
  subscription_id: str,
110
99
  group_name: str,
111
100
  workspace_name: str,
112
- ml_client: "MLClient",
101
+ management_client: LiteMLClient,
113
102
  promptflow_run: Optional[Run] = None,
103
+ tags: Optional[Dict[str, str]] = None,
114
104
  ) -> None:
115
105
  self._tracking_uri: str = tracking_uri
116
106
  self._subscription_id: str = subscription_id
117
107
  self._resource_group_name: str = group_name
118
108
  self._workspace_name: str = workspace_name
119
- self._ml_client: Any = ml_client
109
+ self._management_client: LiteMLClient = management_client
120
110
  self._is_promptflow_run: bool = promptflow_run is not None
121
111
  self._run_name = run_name
122
112
  self._promptflow_run = promptflow_run
113
+ self._tags = tags or {}
123
114
  self._status = RunStatus.NOT_STARTED
124
- self._url_base = None
125
- self.info = None
115
+ self._url_base: Optional[str] = None
116
+ self._info: Optional[RunInfo] = None
126
117
 
127
118
  @property
128
119
  def status(self) -> RunStatus:
@@ -134,6 +125,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
134
125
  """
135
126
  return self._status
136
127
 
128
+ @property
129
+ def info(self) -> RunInfo:
130
+ if self._info is None:
131
+ msg = "Run info is missing"
132
+ raise EvaluationException(
133
+ message=msg,
134
+ internal_message=msg,
135
+ target=ErrorTarget.EVAL_RUN,
136
+ category=ErrorCategory.UNKNOWN,
137
+ blame=ErrorBlame.UNKNOWN,
138
+ )
139
+
140
+ return self._info
141
+
137
142
  def _get_scope(self) -> str:
138
143
  """
139
144
  Return the scope information for the workspace.
@@ -161,28 +166,37 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
161
166
  )
162
167
  self._url_base = None
163
168
  self._status = RunStatus.BROKEN
164
- self.info = RunInfo.generate(self._run_name)
169
+ self._info = RunInfo.generate(self._run_name)
165
170
  else:
166
171
  self._url_base = urlparse(self._tracking_uri).netloc
167
172
  if self._promptflow_run is not None:
168
- self.info = RunInfo(
173
+ self._info = RunInfo(
169
174
  self._promptflow_run.name,
170
- self._promptflow_run._experiment_name, # pylint: disable=protected-access
175
+ self._promptflow_run._experiment_name or "", # pylint: disable=protected-access
171
176
  self._promptflow_run.name,
172
177
  )
173
178
  else:
174
179
  url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/create"
180
+
181
+ # Prepare tags: start with user tags, ensure mlflow.user is set
182
+ run_tags = self._tags.copy()
183
+ if "mlflow.user" not in run_tags:
184
+ run_tags["mlflow.user"] = "azure-ai-evaluation"
185
+
186
+ # Convert tags to MLflow format
187
+ tags_list = [{"key": key, "value": value} for key, value in run_tags.items()]
188
+
175
189
  body = {
176
190
  "experiment_id": "0",
177
191
  "user_id": "azure-ai-evaluation",
178
192
  "start_time": int(time.time() * 1000),
179
- "tags": [{"key": "mlflow.user", "value": "azure-ai-evaluation"}],
193
+ "tags": tags_list,
180
194
  }
181
195
  if self._run_name:
182
196
  body["run_name"] = self._run_name
183
197
  response = self.request_with_retry(url=url, method="POST", json_dict=body)
184
198
  if response.status_code != 200:
185
- self.info = RunInfo.generate(self._run_name)
199
+ self._info = RunInfo.generate(self._run_name)
186
200
  LOGGER.warning(
187
201
  "The run failed to start: %s: %s."
188
202
  "The results will be saved locally, but will not be logged to Azure.",
@@ -192,7 +206,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
192
206
  self._status = RunStatus.BROKEN
193
207
  else:
194
208
  parsed_response = response.json()
195
- self.info = RunInfo(
209
+ self._info = RunInfo(
196
210
  run_id=parsed_response["run"]["info"]["run_id"],
197
211
  experiment_id=parsed_response["run"]["info"]["experiment_id"],
198
212
  run_name=parsed_response["run"]["info"]["run_name"],
@@ -235,7 +249,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
235
249
  LOGGER.warning("Unable to terminate the run.")
236
250
  self._status = RunStatus.TERMINATED
237
251
 
238
- def __enter__(self):
252
+ def __enter__(self) -> Self:
239
253
  """The Context Manager enter call.
240
254
 
241
255
  :return: The instance of the class.
@@ -249,7 +263,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
249
263
  exc_type: Optional[Type[BaseException]],
250
264
  exc_value: Optional[BaseException],
251
265
  exc_tb: Optional[types.TracebackType],
252
- ) -> Optional[bool]:
266
+ ) -> None:
253
267
  """The context manager exit call.
254
268
 
255
269
  :param exc_type: The exception type
@@ -293,12 +307,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
293
307
  """
294
308
  return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
295
309
 
296
- def _get_token(self):
297
- # We have to use lazy import because promptflow.azure
298
- # is an optional dependency.
299
- from promptflow.azure._utils._token_cache import ArmTokenCache # pylint: disable=import-error,no-name-in-module
300
-
301
- return ArmTokenCache().get_token(self._ml_client._credential) # pylint: disable=protected-access
310
+ def _get_token(self) -> str:
311
+ return self._management_client.get_token().token
302
312
 
303
313
  def request_with_retry(
304
314
  self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
@@ -396,7 +406,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
396
406
  """
397
407
  if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
398
408
  return
399
- # Check if artifact dirrectory is empty or does not exist.
409
+ # Check if artifact directory is empty or does not exist.
400
410
  if not os.path.isdir(artifact_folder):
401
411
  LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
402
412
  return
@@ -407,8 +417,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
407
417
  LOGGER.warning("The run results file was not found, skipping artifacts upload.")
408
418
  return
409
419
  # First we will list the files and the appropriate remote paths for them.
410
- root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
411
- remote_paths = {"paths": []}
420
+ root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_id)
421
+ remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
412
422
  local_paths = []
413
423
  # Go over the artifact folder and upload all artifacts.
414
424
  for root, _, filenames in os.walk(artifact_folder):
@@ -424,18 +434,38 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
424
434
  local_paths.append(local_file_path)
425
435
 
426
436
  # We will write the artifacts to the workspaceblobstore
427
- datastore = self._ml_client.datastores.get_default(include_secrets=True)
437
+ datastore = self._management_client.workspace_get_default_datastore(
438
+ self._workspace_name, include_credentials=True
439
+ )
428
440
  account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
429
- svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
430
- for local, remote in zip(local_paths, remote_paths["paths"]):
431
- blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
432
- with open(local, "rb") as fp:
433
- blob_client.upload_blob(fp, overwrite=True)
441
+
442
+ svc_client = BlobServiceClient(account_url=account_url, credential=datastore.credential)
443
+ try:
444
+ for local, remote in zip(local_paths, remote_paths["paths"]):
445
+ blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
446
+ with open(local, "rb") as fp:
447
+ blob_client.upload_blob(fp, overwrite=True)
448
+ except HttpResponseError as ex:
449
+ if ex.status_code == 403:
450
+ msg = (
451
+ "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
452
+ " Please ensure that the necessary access rights are granted."
453
+ )
454
+ raise EvaluationException(
455
+ message=msg,
456
+ target=ErrorTarget.EVAL_RUN,
457
+ category=ErrorCategory.FAILED_REMOTE_TRACKING,
458
+ blame=ErrorBlame.USER_ERROR,
459
+ tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
460
+ ) from ex
461
+
462
+ raise ex
434
463
 
435
464
  # To show artifact in UI we will need to register it. If it is a promptflow run,
436
465
  # we are rewriting already registered artifact and need to skip this step.
437
466
  if self._is_promptflow_run:
438
467
  return
468
+
439
469
  url = (
440
470
  f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
441
471
  f"/resourceGroups/{self._resource_group_name}/providers/"
@@ -458,15 +488,28 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
458
488
  if response.status_code != 200:
459
489
  self._log_warning("register artifact", response)
460
490
 
461
- def _get_datastore_credential(self, datastore: "Datastore"):
462
- # Reference the logic in azure.ai.ml._artifact._artifact_utilities
463
- # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103
464
- credential = datastore.credentials
465
- if isinstance(credential, AccountKeyConfiguration):
466
- return credential.account_key
467
- if hasattr(credential, "sas_token"):
468
- return credential.sas_token
469
- return self._ml_client.datastores._credential # pylint: disable=protected-access
491
+ # register artifacts for images if exists in image folder
492
+ try:
493
+ for remote_path in remote_paths["paths"]:
494
+ remote_file_path = remote_path["path"]
495
+ if "images" in os.path.normpath(remote_file_path).split(os.sep):
496
+ response = self.request_with_retry(
497
+ url=url,
498
+ method="POST",
499
+ json_dict={
500
+ "origin": "ExperimentRun",
501
+ "container": f"dcid.{self.info.run_id}",
502
+ "path": posixpath.join("images", os.path.basename(remote_file_path)),
503
+ "dataPath": {
504
+ "dataStoreName": datastore.name,
505
+ "relativePath": remote_file_path,
506
+ },
507
+ },
508
+ )
509
+ if response.status_code != 200:
510
+ self._log_warning("register image artifact", response)
511
+ except Exception as ex: # pylint: disable=broad-exception-caught
512
+ LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
470
513
 
471
514
  def log_metric(self, key: str, value: float) -> None:
472
515
  """