azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +85 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +147 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +87 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
  155. azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
  156. azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
  157. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  158. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  159. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  160. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  161. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  162. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  163. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  165. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
  264. azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,97 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # Pretty much all this code will be removed
6
+
7
+ from typing import Any, Dict, Optional
8
+
9
+
10
+ def start_trace(
11
+ *,
12
+ resource_attributes: Optional[Dict] = None,
13
+ collection: Optional[str] = None,
14
+ **kwargs: Any,
15
+ ) -> None:
16
+ """Starts a trace.
17
+
18
+ :param resource_attributes: Specify the resource attributes for current process.
19
+ :type resource_attributes: typing.Optional[dict]
20
+ :param collection: Specify the collection for current tracing.
21
+ :type collection: typing.Optional[str]
22
+ """
23
+ pass
24
+
25
+ # res_attrs: Dict[str, str] = {"service.name": "promptflow"}
26
+ # if resource_attributes:
27
+ # logging.debug("specified resource attributes: %s", resource_attributes)
28
+ # res_attrs.update(resource_attributes)
29
+
30
+ # # determine collection
31
+ # collection_user_specified = collection is not None
32
+ # if not collection_user_specified:
33
+ # collection = kwargs.get("_collection", _get_collection_from_cwd())
34
+ # logging.debug("collection is not user specified")
35
+ # if is_collection_writeable():
36
+ # # internal parameter for devkit call
37
+ # _collection = kwargs.get("_collection", None)
38
+ # if _collection is not None:
39
+ # logging.debug("received internal parameter _collection: %s, will use this", _collection)
40
+ # collection = _collection
41
+ # else:
42
+ # logging.debug("trying to get from current working directory...")
43
+ # collection = _get_collection_from_cwd()
44
+ # # TODO ralphe: OpenTelemetry dependency. This is a future task to resolve.
45
+ # # else:
46
+ # # logging.debug("collection is protected, will directly use that...")
47
+ # # tracer_provider: TracerProvider = trace.get_tracer_provider()
48
+ # # collection = tracer_provider.resource.attributes["collection"]
49
+ # logging.info("collection: %s", collection)
50
+ # res_attrs["collection"] = collection or "default"
51
+ # logging.info("resource attributes: %s", res_attrs)
52
+
53
+ # # if user specifies collection, we will add a flag on tracer provider to avoid override
54
+ # _set_tracer_provider(res_attrs, protected_collection=collection_user_specified)
55
+
56
+ # Rest of code is removed since we are removing promptflow-devkit dependency
57
+
58
+
59
+ # def is_collection_writeable() -> bool:
60
+ # # TODO ralphe: This has OpenTelemetry dependency. That is a future task to resolve.
61
+ # # return not getattr(trace.get_tracer_provider(), TRACER_PROVIDER_PROTECTED_COLLECTION_ATTR, False)
62
+ # return True
63
+
64
+
65
+ # def _get_collection_from_cwd() -> str:
66
+ # """Try to use cwd folder name as collection name; will fall back to default value if run into exception."""
67
+ # cur_folder_name = ""
68
+ # try:
69
+ # cwd = os.getcwd()
70
+ # cur_folder_name = os.path.basename(cwd)
71
+ # except Exception: # pylint: disable=broad-except
72
+ # # possible exception: PermissionError, FileNotFoundError, OSError, etc.
73
+ # pass
74
+ # collection = cur_folder_name or "default"
75
+ # return collection
76
+
77
+
78
+ # def _set_tracer_provider(res_attrs: Dict[str, str], protected_collection: bool) -> None:
79
+ # # TODO ralphe: OpenTelemetry dependency. This is a future task to resolve.
80
+ # # res = Resource(attributes=res_attrs)
81
+ # # tracer_provider = TracerProvider(resource=res)
82
+
83
+ # # cur_tracer_provider = trace.get_tracer_provider()
84
+ # # if isinstance(cur_tracer_provider, TracerProvider):
85
+ # # logging.info("tracer provider is already set, will merge the resource attributes...")
86
+ # # cur_res = cur_tracer_provider.resource
87
+ # # logging.debug("current resource: %s", cur_res.attributes)
88
+ # # new_res = cur_res.merge(res)
89
+ # # cur_tracer_provider._resource = new_res
90
+ # # logging.info("tracer provider is updated with resource attributes: %s", new_res.attributes)
91
+ # # else:
92
+ # # trace.set_tracer_provider(tracer_provider)
93
+ # # logging.info("tracer provider is set with resource attributes: %s", res.attributes)
94
+
95
+ # # if protected_collection:
96
+ # # logging.info("user specifies collection, will add a flag on tracer provider to avoid override...")
97
+ # # setattr(trace.get_tracer_provider(), TRACER_PROVIDER_PROTECTED_COLLECTION_ATTR, True)
@@ -0,0 +1,97 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import inspect
6
+ import os
7
+ import re
8
+ from typing import Any, Final, Mapping, Sequence, Tuple
9
+
10
+
11
+ DEFAULTS_KEY: Final[str] = "$defaults$"
12
+
13
+
14
+ def normalize_identifier_name(name: str) -> str:
15
+ """Normalize the identifier name to a valid Python variable name.
16
+
17
+ Args:
18
+ name (str): The identifier name to normalize.
19
+
20
+ Returns:
21
+ str: The normalized identifier name.
22
+ """
23
+ normalized = re.sub(r"\W", "_", name.strip())
24
+ if normalized[0].isdigit():
25
+ normalized = f"_{normalized}"
26
+ return normalized
27
+
28
+
29
+ def get_int_env_var(env_var_name: str, default_value: int = 0) -> int:
30
+ """Get the integer value of the environment variable.
31
+
32
+ Args:
33
+ env_var_name (str): The name of the environment variable.
34
+ default_value (int): The default value if the environment variable is not set.
35
+
36
+ Returns:
37
+ int: The integer value of the environment variable.
38
+ """
39
+ try:
40
+ value = os.getenv(env_var_name, default_value)
41
+ return int(value)
42
+ except ValueError:
43
+ return default_value
44
+
45
+
46
+ def get_value_from_path(path: str, data: Mapping[str, Any]) -> Tuple[bool, Any]:
47
+ """Tried to get a value from a mapping based on the specified path. The path is a
48
+ string with dot-separated keys (e.g. data.nested_1.nested_2).
49
+
50
+ This will interpret the path prioritizing a depth first search with the shortest
51
+ key possible at each level. If for example you had the following data:
52
+ {
53
+ "foo": {
54
+ "bar": {
55
+ "happy": 12
56
+ }
57
+ },
58
+ "foo.bar": {
59
+ "none": 14,
60
+ "random": { "some": 15 }
61
+ },
62
+ "foo.bar.none": 16
63
+ }
64
+ And you asked for foo.bar.none, the returned value would be 14"
65
+ """
66
+
67
+ def _get_value(data: Mapping[str, Any], parts: Sequence[str]) -> Tuple[bool, Any]:
68
+ if len(parts) == 0:
69
+ return True, data
70
+
71
+ for i in range(1, len(parts) + 1):
72
+ key = ".".join(parts[:i])
73
+ if isinstance(data, Mapping) and key in data:
74
+ found, match = _get_value(data[key], parts[i:])
75
+ if found:
76
+ return found, match
77
+
78
+ return False, None
79
+
80
+ if path is None or data is None:
81
+ return False, None
82
+
83
+ parts = path.strip().split(".")
84
+ if len(parts) == 0:
85
+ return False, None
86
+ return _get_value(data, parts)
87
+
88
+
89
+ def is_async_callable(obj: Any) -> bool:
90
+ """Check if the object is an async callable. This will be true if the object is a coroutine function,
91
+ or if the object has
92
+
93
+ :param Any obj: The object to check.
94
+ :return: True if the object is an async callable.
95
+ :rtype: bool
96
+ """
97
+ return inspect.iscoroutinefunction(obj) or inspect.iscoroutinefunction(getattr(obj, "__call__", None))
@@ -0,0 +1,131 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import asyncio
6
+ import contextvars
7
+ import dataclasses
8
+ from asyncio import Task
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from typing import Any, AsyncIterator, Callable, Iterator, Mapping, Optional, Sequence, Tuple, cast
11
+
12
+
13
+ class ThreadPoolExecutorWithContext(ThreadPoolExecutor):
14
+ # Original source:
15
+ # promptflow-tracing/promptflow/tracing/_context_utils.py
16
+
17
+ def __init__(
18
+ self,
19
+ max_workers: Optional[int] = None,
20
+ thread_name_prefix: str = "",
21
+ initializer: Optional[Callable] = None,
22
+ initargs: Tuple[Any, ...] = (),
23
+ ) -> None:
24
+ """The ThreadPoolExecutionWithContext is an extended thread pool implementation
25
+ which will copy the context from the current thread to the child threads.
26
+ Thus the traced functions in child threads could keep parent-child relationship in the tracing system.
27
+ The arguments are the same as ThreadPoolExecutor.
28
+
29
+ Args:
30
+ max_workers: The maximum number of threads that can be used to
31
+ execute the given calls.
32
+ thread_name_prefix: An optional name prefix to give our threads.
33
+ initializer: A callable used to initialize worker threads.
34
+ initargs: A tuple of arguments to pass to the initializer.
35
+ """
36
+ current_context = contextvars.copy_context()
37
+ initializer_args = (current_context, initializer, initargs)
38
+ super().__init__(max_workers, thread_name_prefix, self.set_context_then_call, initializer_args)
39
+
40
+ @staticmethod
41
+ def set_context_then_call(
42
+ context: contextvars.Context,
43
+ initializer: Optional[Callable],
44
+ initargs: Tuple[Any, ...],
45
+ ) -> None:
46
+ for var, value in context.items():
47
+ var.set(value)
48
+ if initializer:
49
+ initializer(*initargs)
50
+
51
+
52
+ def _has_running_loop() -> bool:
53
+ """Check if the current thread has a running event loop."""
54
+ # When using asyncio.get_running_loop(), a RuntimeError is raised if there is no running event loop.
55
+ # So, we use a try-catch block to determine whether there is currently an event loop in place.
56
+ #
57
+ # Note that this is the only way to check whether there is a running loop now, see:
58
+ # https://docs.python.org/3/library/asyncio-eventloop.html?highlight=get_running_loop#asyncio.get_running_loop
59
+ try:
60
+ asyncio.get_running_loop()
61
+ return True
62
+ except RuntimeError:
63
+ return False
64
+
65
+
66
+ def async_run_allowing_running_loop(async_func, *args, **kwargs):
67
+ """Run an async function in a new thread, allowing the current thread to have a running event loop.
68
+
69
+ When run in an async environment (e.g., in a notebook), because each thread allows only one event
70
+ loop, using asyncio.run directly leads to a RuntimeError ("asyncio.run() cannot be called from a
71
+ running event loop").
72
+
73
+ To address this issue, we add a check for the event loop here. If the current thread already has an
74
+ event loop, we run _exec_batch in a new thread; otherwise, we run it in the current thread.
75
+ """
76
+
77
+ if _has_running_loop():
78
+ # TODO ralphe: The logic here makes absolutely no sense to me. If you already have an
79
+ # async event loop running, why would you want to start up a new thread,
80
+ # create a new event loop, and run the async function in a new thread?
81
+ # You can just use the following to schedule the async function call on
82
+ # the existing event loop:
83
+ # asyncio.get_running_loop().create_task(async_func(*args, *args, **kwargs)).result()
84
+ # The correct thing to do here is not make these decisions here at all.
85
+ # Instead, all the BatchEngine code should be async first, with the event
86
+ # loop being started by the callers of that code. For now, I am keeping
87
+ # this odd logic as is, and in phase 2 of the migration, this will be
88
+ # refactored to be more idiomatic asyncio code.
89
+ with ThreadPoolExecutorWithContext() as executor:
90
+ return executor.submit(lambda: asyncio.run(async_func(*args, **kwargs))).result()
91
+ else:
92
+ return asyncio.run(async_func(*args, **kwargs))
93
+
94
+
95
+ async def stringify_output_async(output: Any) -> str:
96
+ if isinstance(output, AsyncIterator):
97
+ return await stringify_output_async([v async for v in output])
98
+ if isinstance(output, Iterator):
99
+ return await stringify_output_async([v for v in output])
100
+ if isinstance(output, Mapping):
101
+ return ", ".join(
102
+ [f"{await stringify_output_async(k)}:{await stringify_output_async(v)}" for k, v in output.items()]
103
+ )
104
+ if isinstance(output, Sequence):
105
+ return "".join([await stringify_output_async(v) for v in output])
106
+ if isinstance(output, Task):
107
+ return await stringify_output_async(await output)
108
+
109
+ return str(output)
110
+
111
+
112
+ def convert_eager_flow_output_to_dict(value: Any) -> Mapping[str, Any]:
113
+ """
114
+ Convert the output of eager flow to a dict. Since the output of eager flow
115
+ may not be a dict, we need to convert it to a dict in batch mode.
116
+
117
+ Examples:
118
+ 1. If the output is a dict, return it directly:
119
+ value = {"output": 1} -> {"output": 1}
120
+ 2. If the output is a dataclass, convert it to a dict:
121
+ value = SampleDataClass(output=1) -> {"output": 1}
122
+ 3. If the output is not a dict or dataclass, convert it to a dict by adding a key "output":
123
+ value = 1 -> {"output": 1}
124
+ """
125
+
126
+ if isinstance(value, Mapping):
127
+ return value
128
+ elif dataclasses.is_dataclass(value):
129
+ return dataclasses.asdict(cast(Any, value))
130
+ else:
131
+ return {"output": value}
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
@@ -0,0 +1,117 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import os
6
+ from typing import Any, AsyncContextManager, Optional
7
+
8
+ from azure.core.credentials import AccessToken, TokenCredential
9
+ from azure.identity import AzureCliCredential, DefaultAzureCredential, ManagedIdentityCredential
10
+
11
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
12
+ from azure.ai.evaluation._azure._envs import AzureEnvironmentClient
13
+
14
+
15
+ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
16
+ """Asynchronous token provider for Azure services that supports non-default Azure clouds
17
+ (e.g. Azure China, Azure US Government, etc.)."""
18
+
19
+ def __init__(self, *, base_url: Optional[str] = None, **kwargs: Any) -> None:
20
+ """Initialize the AsyncAzureTokenProvider."""
21
+ self._credential: Optional[TokenCredential] = None
22
+ self._env_client: Optional[AzureEnvironmentClient] = AzureEnvironmentClient(base_url=base_url, **kwargs)
23
+
24
+ async def close(self) -> None:
25
+ if self._env_client:
26
+ await self._env_client.close()
27
+ self._env_client = None
28
+
29
+ self._credential = None
30
+
31
+ async def get_token(
32
+ self,
33
+ *scopes: str,
34
+ claims: Optional[str] = None,
35
+ tenant_id: Optional[str] = None,
36
+ enable_cae: bool = False,
37
+ **kwargs: Any,
38
+ ) -> AccessToken:
39
+ if self._credential is None:
40
+ self._credential = await self._initialize_async(self._env_client)
41
+
42
+ if self._credential is None:
43
+ raise EvaluationException(
44
+ f"{self.__class__.__name__} could not determine the credential to use.",
45
+ target=ErrorTarget.UNKNOWN,
46
+ category=ErrorCategory.INVALID_VALUE,
47
+ blame=ErrorBlame.SYSTEM_ERROR,
48
+ )
49
+
50
+ return self._credential.get_token(*scopes, claims=claims, tenant_id=tenant_id, enable_cae=enable_cae, **kwargs)
51
+
52
+ async def __aenter__(self) -> "AsyncAzureTokenProvider":
53
+ self._credential = await self._initialize_async(self._env_client)
54
+ return self
55
+
56
+ async def __aexit__(
57
+ self,
58
+ exc_type: Optional[type] = None,
59
+ exc_value: Optional[BaseException] = None,
60
+ traceback: Optional[Any] = None,
61
+ ) -> None:
62
+ await self.close()
63
+
64
+ @staticmethod
65
+ async def _initialize_async(client: Optional[AzureEnvironmentClient]) -> TokenCredential:
66
+ # Determine which credential to use based on the configured Azure cloud environment variables
67
+ # and possibly making network calls to Azure to get the correct Azure cloud metadata.
68
+ if client is None:
69
+ raise EvaluationException(
70
+ f"{AsyncAzureTokenProvider.__name__} instance has already been closed.",
71
+ target=ErrorTarget.UNKNOWN,
72
+ category=ErrorCategory.INVALID_VALUE,
73
+ blame=ErrorBlame.USER_ERROR,
74
+ )
75
+
76
+ cloud_name: str = await client.get_default_cloud_name_async()
77
+ if cloud_name != client.DEFAULT_AZURE_CLOUD_NAME:
78
+ # If the cloud name is not the default, we need to get the metadata for the specified cloud
79
+ # and set it in the environment client.
80
+ metadata = await client.get_cloud_async(cloud_name)
81
+ if metadata is None:
82
+ raise EvaluationException(
83
+ f"Failed to get metadata for cloud '{cloud_name}'.",
84
+ target=ErrorTarget.UNKNOWN,
85
+ category=ErrorCategory.INVALID_VALUE,
86
+ blame=ErrorBlame.USER_ERROR,
87
+ )
88
+
89
+ authority = metadata.get("active_directory_endpoint")
90
+ return DefaultAzureCredential(authority=authority, exclude_shared_token_cache_credential=True)
91
+ elif os.getenv("AZUREML_OBO_ENABLED"):
92
+ # using Azure on behalf of credentials requires the use of the azure-ai-ml package
93
+ try:
94
+ from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
95
+
96
+ return AzureMLOnBehalfOfCredential() # type: ignore
97
+ except (ModuleNotFoundError, ImportError):
98
+ raise EvaluationException( # pylint: disable=raise-missing-from
99
+ message=(
100
+ "The required packages for OBO credentials are missing.\n"
101
+ 'To resolve this, please install them by running "pip install azure-ai-ml".'
102
+ ),
103
+ target=ErrorTarget.EVALUATE,
104
+ category=ErrorCategory.MISSING_PACKAGE,
105
+ blame=ErrorBlame.USER_ERROR,
106
+ )
107
+ elif os.environ.get("PF_USE_AZURE_CLI_CREDENTIAL", "false").lower() == "true":
108
+ # TODO ralphe: Is this still needed? DefaultAzureCredential already includes CLI credentials
109
+ # albeit with a lower priority
110
+ return AzureCliCredential()
111
+ elif os.environ.get("IS_IN_CI_PIPELINE", "false").lower() == "true":
112
+ # use managed identity when executing in CI pipeline.
113
+ return AzureCliCredential()
114
+ elif identity_client_id := os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"):
115
+ return ManagedIdentityCredential(client_id=identity_client_id)
116
+ else:
117
+ return DefaultAzureCredential()