azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,11 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from concurrent.futures import as_completed
5
- from typing import Callable, Dict, List, Union
4
+ from typing import Dict, List, Union
6
5
 
7
- from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
8
6
  from typing_extensions import overload, override
9
7
 
10
- from azure.ai.evaluation._evaluators._common import EvaluatorBase
8
+ from azure.ai.evaluation._evaluators._common import MultiEvaluatorBase
11
9
  from azure.ai.evaluation._model_configurations import Conversation
12
10
  from azure.ai.evaluation._common._experimental import experimental
13
11
 
@@ -18,15 +16,25 @@ from ._violence import ViolenceEvaluator
18
16
 
19
17
 
20
18
  @experimental
21
- class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
19
+ class ContentSafetyEvaluator(MultiEvaluatorBase[Union[str, float]]):
22
20
  """
23
- Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.
21
+ Initialize a content safety evaluator configured to evaluate content safety metrics for QA scenario.
24
22
 
25
23
  :param credential: The credential for connecting to Azure AI project. Required
26
24
  :type credential: ~azure.core.credentials.TokenCredential
27
- :param azure_ai_project: The scope of the Azure AI project.
28
- It contains subscription id, resource group, and project name.
29
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
25
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
26
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
27
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
28
+ :param violence_threshold: The threshold for violence evaluation. Default is 3.
29
+ :type violence_threshold: int
30
+ :param sexual_threshold: The threshold for sexual content evaluation. Default is 3.
31
+ :type sexual_threshold: int
32
+ :param self_harm_threshold: The threshold for self-harm evaluation. Default is 3.
33
+ :type self_harm_threshold: int
34
+ :param hate_unfairness_threshold: The threshold for hate/unfairness evaluation. Default is 3.
35
+ :type hate_unfairness_threshold: int
36
+ :param evaluate_query: Whether to also evaluate the query in addition to the response. Default is False.
37
+ :type evaluate_query: bool
30
38
  :param kwargs: Additional arguments to pass to the evaluator.
31
39
  :type kwargs: Any
32
40
  :return: A function that evaluates content-safety metrics for "question-answering" scenario.
@@ -38,22 +46,67 @@ class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
38
46
  :end-before: [END content_safety_evaluator]
39
47
  :language: python
40
48
  :dedent: 8
41
- :caption: Initialize and call a ContentSafetyEvaluator.
49
+ :caption: Initialize and call ContentSafetyEvaluator using azure.ai.evaluation.AzureAIProject.
50
+
51
+ .. admonition:: Example using Azure AI Project URL:
52
+
53
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
54
+ :start-after: [START content_safety_evaluator]
55
+ :end-before: [END content_safety_evaluator]
56
+ :language: python
57
+ :dedent: 8
58
+ :caption: Initialize and call ContentSafetyEvaluator using Azure AI Project URL in the following format
59
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
60
+
61
+ .. admonition:: Example with Threshold:
62
+
63
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
64
+ :start-after: [START threshold_content_safety_evaluator]
65
+ :end-before: [END threshold_content_safety_evaluator]
66
+ :language: python
67
+ :dedent: 8
68
+ :caption: Initialize with threshold and call a ContentSafetyEvaluator with a query and response.
42
69
  """
43
70
 
44
- id = "content_safety"
71
+ id = "azureai://built-in/evaluators/content_safety"
45
72
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
73
+ _OPTIONAL_PARAMS = ["query"]
46
74
 
47
- # TODO address 3579092 to re-enabled parallel evals.
48
- def __init__(self, credential, azure_ai_project, **kwargs):
49
- super().__init__()
50
- self._parallel = kwargs.pop("_parallel", False)
51
- self._evaluators: List[Callable[..., Dict[str, Union[str, float]]]] = [
52
- ViolenceEvaluator(credential, azure_ai_project),
53
- SexualEvaluator(credential, azure_ai_project),
54
- SelfHarmEvaluator(credential, azure_ai_project),
55
- HateUnfairnessEvaluator(credential, azure_ai_project),
75
+ def __init__(
76
+ self,
77
+ credential,
78
+ azure_ai_project,
79
+ *,
80
+ violence_threshold: int = 3,
81
+ sexual_threshold: int = 3,
82
+ self_harm_threshold: int = 3,
83
+ hate_unfairness_threshold: int = 3,
84
+ **kwargs,
85
+ ):
86
+ # Type checking
87
+ for name, value in [
88
+ ("violence_threshold", violence_threshold),
89
+ ("sexual_threshold", sexual_threshold),
90
+ ("self_harm_threshold", self_harm_threshold),
91
+ ("hate_unfairness_threshold", hate_unfairness_threshold),
92
+ ]:
93
+ if not isinstance(value, int):
94
+ raise TypeError(f"{name} must be an int, got {type(value)}")
95
+
96
+ # Extract evaluate_query from kwargs if present
97
+ evaluate_query_kwargs = {}
98
+ if "evaluate_query" in kwargs:
99
+ evaluate_query_kwargs["evaluate_query"] = kwargs["evaluate_query"]
100
+
101
+ evaluators = [
102
+ ViolenceEvaluator(credential, azure_ai_project, threshold=violence_threshold, **evaluate_query_kwargs),
103
+ SexualEvaluator(credential, azure_ai_project, threshold=sexual_threshold, **evaluate_query_kwargs),
104
+ SelfHarmEvaluator(credential, azure_ai_project, threshold=self_harm_threshold, **evaluate_query_kwargs),
105
+ HateUnfairnessEvaluator(
106
+ credential, azure_ai_project, threshold=hate_unfairness_threshold, **evaluate_query_kwargs
107
+ ),
56
108
  ]
109
+ super().__init__(evaluators=evaluators, **kwargs)
57
110
 
58
111
  @overload
59
112
  def __call__(
@@ -109,36 +162,3 @@ class ContentSafetyEvaluator(EvaluatorBase[Union[str, float]]):
109
162
  :rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[float, Dict[str, List[Union[str, float]]]]]]
110
163
  """
111
164
  return super().__call__(*args, **kwargs)
112
-
113
- @override
114
- async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]:
115
- """Perform the evaluation using the Azure AI RAI service.
116
- The exact evaluation performed is determined by the evaluation metric supplied
117
- by the child class initializer.
118
-
119
- :param eval_input: The input to the evaluation function.
120
- :type eval_input: Dict
121
- :return: The evaluation result.
122
- :rtype: Dict
123
- """
124
- query = eval_input.get("query", None)
125
- response = eval_input.get("response", None)
126
- conversation = eval_input.get("conversation", None)
127
- results: Dict[str, Union[str, float]] = {}
128
- # TODO fix this to not explode on empty optional inputs (PF SKD error)
129
- if self._parallel:
130
- with ThreadPoolExecutor() as executor:
131
- # pylint: disable=no-value-for-parameter
132
- futures = {
133
- executor.submit(query=query, response=response, conversation=conversation): evaluator
134
- for evaluator in self._evaluators
135
- }
136
-
137
- for future in as_completed(futures):
138
- results.update(future.result())
139
- else:
140
- for evaluator in self._evaluators:
141
- result = evaluator(query=query, response=response, conversation=conversation)
142
- results.update(result)
143
-
144
- return results
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
9
9
  from azure.ai.evaluation._common.constants import EvaluationMetrics
10
10
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
11
11
  from azure.ai.evaluation._model_configurations import Conversation
12
+ from azure.ai.evaluation._constants import _AggregationType
12
13
 
13
14
 
14
15
  @experimental
@@ -44,9 +45,11 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
44
45
 
45
46
  :param credential: The credential for connecting to Azure AI project. Required
46
47
  :type credential: ~azure.core.credentials.TokenCredential
47
- :param azure_ai_project: The scope of the Azure AI project.
48
- It contains subscription id, resource group, and project name.
49
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
48
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
49
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
50
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
51
+ :param threshold: The threshold for the HateUnfairness evaluator. Default is 3.
52
+ :type threshold: int
50
53
 
51
54
  .. admonition:: Example:
52
55
 
@@ -55,22 +58,49 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
55
58
  :end-before: [END hate_unfairness_evaluator]
56
59
  :language: python
57
60
  :dedent: 8
58
- :caption: Initialize and call a HateUnfairnessEvaluator.
61
+ :caption: Initialize and call HateUnfairnessEvaluator using azure.ai.evaluation.AzureAIProject.
62
+
63
+ .. admonition:: Example using Azure AI Project URL:
64
+
65
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
66
+ :start-after: [START hate_unfairness_evaluator]
67
+ :end-before: [END hate_unfairness_evaluator]
68
+ :language: python
69
+ :dedent: 8
70
+ :caption: Initialize and call HateUnfairnessEvaluator using Azure AI Project URL in the following format
71
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
72
+
73
+ .. admonition:: Example with Threshold:
74
+
75
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
76
+ :start-after: [START threshold_hate_unfairness_evaluator]
77
+ :end-before: [END threshold_hate_unfairness_evaluator]
78
+ :language: python
79
+ :dedent: 8
80
+ :caption: Initialize with threshold and call a HateUnfairnessEvaluator with a query and response.
59
81
  """
60
82
 
61
- id = "azureml://registries/azureml/models/Hate-and-Unfairness-Evaluator/versions/4"
83
+ id = "azureai://built-in/evaluators/hate_unfairness"
62
84
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
85
+ _OPTIONAL_PARAMS = ["query"]
63
86
 
64
87
  @override
65
88
  def __init__(
66
89
  self,
67
90
  credential,
68
91
  azure_ai_project,
92
+ *,
93
+ threshold: int = 3,
94
+ **kwargs,
69
95
  ):
70
96
  super().__init__(
71
97
  eval_metric=EvaluationMetrics.HATE_FAIRNESS,
72
98
  azure_ai_project=azure_ai_project,
73
99
  credential=credential,
100
+ conversation_aggregation_type=_AggregationType.MAX,
101
+ threshold=threshold,
102
+ _higher_is_better=False,
103
+ **kwargs,
74
104
  )
75
105
 
76
106
  @overload
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
9
9
  from azure.ai.evaluation._common.constants import EvaluationMetrics
10
10
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
11
11
  from azure.ai.evaluation._model_configurations import Conversation
12
+ from azure.ai.evaluation._constants import _AggregationType
12
13
 
13
14
 
14
15
  @experimental
@@ -38,9 +39,11 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
38
39
 
39
40
  :param credential: The credential for connecting to Azure AI project. Required
40
41
  :type credential: ~azure.core.credentials.TokenCredential
41
- :param azure_ai_project: The scope of the Azure AI project.
42
- It contains subscription id, resource group, and project name.
43
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
42
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
43
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
44
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
45
+ :param threshold: The threshold for the SelfHarm evaluator. Default is 3.
46
+ :type threshold: int
44
47
 
45
48
  .. admonition:: Example:
46
49
 
@@ -49,22 +52,40 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
49
52
  :end-before: [END self_harm_evaluator]
50
53
  :language: python
51
54
  :dedent: 8
52
- :caption: Initialize and call a SelfHarmEvaluator.
55
+ :caption: Initialize and call SelfHarmEvaluator using azure.ai.evaluation.AzureAIProject.
56
+
57
+ .. admonition:: Example using Azure AI Project URL:
58
+
59
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
60
+ :start-after: [START self_harm_evaluator]
61
+ :end-before: [END self_harm_evaluator]
62
+ :language: python
63
+ :dedent: 8
64
+ :caption: Initialize and call SelfHarmEvaluator using Azure AI Project URL in the following format
65
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}.
53
66
  """
54
67
 
55
- id = "azureml://registries/azureml/models/Self-Harm-Related-Content-Evaluator/versions/3"
68
+ id = "azureai://built-in/evaluators/self_harm"
56
69
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
70
+ _OPTIONAL_PARAMS = ["query"]
57
71
 
58
72
  @override
59
73
  def __init__(
60
74
  self,
61
75
  credential,
62
76
  azure_ai_project,
77
+ *,
78
+ threshold: int = 3,
79
+ **kwargs,
63
80
  ):
64
81
  super().__init__(
65
82
  eval_metric=EvaluationMetrics.SELF_HARM,
66
83
  azure_ai_project=azure_ai_project,
67
84
  credential=credential,
85
+ conversation_aggregation_type=_AggregationType.MAX,
86
+ threshold=threshold,
87
+ _higher_is_better=False,
88
+ **kwargs,
68
89
  )
69
90
 
70
91
  @overload
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
9
9
  from azure.ai.evaluation._common.constants import EvaluationMetrics
10
10
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
11
11
  from azure.ai.evaluation._model_configurations import Conversation
12
+ from azure.ai.evaluation._constants import _AggregationType
12
13
 
13
14
 
14
15
  @experimental
@@ -40,9 +41,11 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
40
41
 
41
42
  :param credential: The credential for connecting to Azure AI project. Required
42
43
  :type credential: ~azure.core.credentials.TokenCredential
43
- :param azure_ai_project: The scope of the Azure AI project.
44
- It contains subscription id, resource group, and project name.
45
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
44
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
45
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
46
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
47
+ :param threshold: The threshold for the Sexual evaluator. Default is 3.
48
+ :type threshold: int
46
49
 
47
50
  .. admonition:: Example:
48
51
 
@@ -52,21 +55,48 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
52
55
  :language: python
53
56
  :dedent: 8
54
57
  :caption: Initialize and call a SexualEvaluator.
58
+
59
+ .. admonition:: Example using Azure AI Project URL:
60
+
61
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
62
+ :start-after: [START sexual_evaluator]
63
+ :end-before: [END sexual_evaluator]
64
+ :language: python
65
+ :dedent: 8
66
+ :caption: Initialize and call SexualEvaluator using Azure AI Project URL in following format
67
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
68
+
69
+ .. admonition:: Example with Threshold:
70
+
71
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
72
+ :start-after: [START threshold_sexual_evaluator]
73
+ :end-before: [END threshold_sexual_evaluator]
74
+ :language: python
75
+ :dedent: 8
76
+ :caption: Initialize with threshold and call a SexualEvaluator.
55
77
  """
56
78
 
57
- id = "azureml://registries/azureml/models/Sexual-Content-Evaluator/versions/3"
79
+ id = "azureai://built-in/evaluators/sexual"
58
80
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
81
+ _OPTIONAL_PARAMS = ["query"]
59
82
 
60
83
  @override
61
84
  def __init__(
62
85
  self,
63
86
  credential,
64
87
  azure_ai_project,
88
+ *,
89
+ threshold: int = 3,
90
+ **kwargs,
65
91
  ):
66
92
  super().__init__(
67
93
  eval_metric=EvaluationMetrics.SEXUAL,
68
94
  azure_ai_project=azure_ai_project,
69
95
  credential=credential,
96
+ conversation_aggregation_type=_AggregationType.MAX,
97
+ threshold=threshold,
98
+ _higher_is_better=False,
99
+ **kwargs,
70
100
  )
71
101
 
72
102
  @overload
@@ -119,7 +149,7 @@ class SexualEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
119
149
  key "messages". Conversation turns are expected
120
150
  to be dictionaries with keys "content" and "role".
121
151
  :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
122
- :return: The fluency score.
152
+ :return: The sexual score.
123
153
  :rtype: Union[Dict[str, Union[str, float]], Dict[str, Union[str, float, Dict[str, List[Union[str, float]]]]]]
124
154
  """
125
155
  return super().__call__(*args, **kwargs)
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
9
9
  from azure.ai.evaluation._common.constants import EvaluationMetrics
10
10
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
11
11
  from azure.ai.evaluation._model_configurations import Conversation
12
+ from azure.ai.evaluation._constants import _AggregationType
12
13
 
13
14
 
14
15
  @experimental
@@ -40,9 +41,11 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
40
41
 
41
42
  :param credential: The credential for connecting to Azure AI project. Required
42
43
  :type credential: ~azure.core.credentials.TokenCredential
43
- :param azure_ai_project: The scope of the Azure AI project.
44
- It contains subscription id, resource group, and project name.
45
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
44
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
45
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
46
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
47
+ :param threshold: The threshold for the Violence evaluator. Default is 3.
48
+ :type threshold: int
46
49
 
47
50
  .. admonition:: Example:
48
51
 
@@ -52,21 +55,48 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase[Union[str, float]]):
52
55
  :language: python
53
56
  :dedent: 8
54
57
  :caption: Initialize and call a ViolenceEvaluator.
58
+
59
+ .. admonition:: Example using Azure AI Project URL:
60
+
61
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
62
+ :start-after: [START violence_evaluator]
63
+ :end-before: [END violence_evaluator]
64
+ :language: python
65
+ :dedent: 8
66
+ :caption: Initialize and call ViolenceEvaluator using Azure AI Project URL in following format
67
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
68
+
69
+ .. admonition:: Example:
70
+
71
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
72
+ :start-after: [START threshold_violence_evaluator]
73
+ :end-before: [END threshold_violence_evaluator]
74
+ :language: python
75
+ :dedent: 8
76
+ :caption: Initialize with threshold and call a ViolenceEvaluator.
55
77
  """
56
78
 
57
- id = "azureml://registries/azureml/models/Violent-Content-Evaluator/versions/3"
79
+ id = "azureai://built-in/evaluators/violence"
58
80
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
81
+ _OPTIONAL_PARAMS = ["query"]
59
82
 
60
83
  @override
61
84
  def __init__(
62
85
  self,
63
86
  credential,
64
87
  azure_ai_project,
88
+ *,
89
+ threshold: int = 3,
90
+ **kwargs,
65
91
  ):
66
92
  super().__init__(
67
93
  eval_metric=EvaluationMetrics.VIOLENCE,
68
94
  azure_ai_project=azure_ai_project,
69
95
  credential=credential,
96
+ conversation_aggregation_type=_AggregationType.MAX,
97
+ threshold=threshold,
98
+ _higher_is_better=False,
99
+ **kwargs,
70
100
  )
71
101
 
72
102
  @overload
@@ -0,0 +1,7 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ._document_retrieval import DocumentRetrievalEvaluator, RetrievalGroundTruthDocument, RetrievedDocument
6
+
7
+ __all__ = ["DocumentRetrievalEvaluator", "RetrievalGroundTruthDocument", "RetrievedDocument"]