azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -12,27 +12,27 @@ from ._evaluators._content_safety import (
12
12
  SexualEvaluator,
13
13
  ViolenceEvaluator,
14
14
  )
15
- from ._evaluators._multimodal._content_safety_multimodal import (
16
- ContentSafetyMultimodalEvaluator,
17
- HateUnfairnessMultimodalEvaluator,
18
- SelfHarmMultimodalEvaluator,
19
- SexualMultimodalEvaluator,
20
- ViolenceMultimodalEvaluator,
21
- )
22
- from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
23
15
  from ._evaluators._f1_score import F1ScoreEvaluator
24
16
  from ._evaluators._fluency import FluencyEvaluator
25
17
  from ._evaluators._gleu import GleuScoreEvaluator
26
18
  from ._evaluators._groundedness import GroundednessEvaluator
27
19
  from ._evaluators._service_groundedness import GroundednessProEvaluator
20
+ from ._evaluators._intent_resolution import IntentResolutionEvaluator
28
21
  from ._evaluators._meteor import MeteorScoreEvaluator
29
22
  from ._evaluators._protected_material import ProtectedMaterialEvaluator
30
23
  from ._evaluators._qa import QAEvaluator
24
+ from ._evaluators._response_completeness import ResponseCompletenessEvaluator
25
+ from ._evaluators._task_adherence import TaskAdherenceEvaluator
31
26
  from ._evaluators._relevance import RelevanceEvaluator
32
27
  from ._evaluators._retrieval import RetrievalEvaluator
33
28
  from ._evaluators._rouge import RougeScoreEvaluator, RougeType
34
29
  from ._evaluators._similarity import SimilarityEvaluator
35
30
  from ._evaluators._xpia import IndirectAttackEvaluator
31
+ from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
32
+ from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
33
+ from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
34
+ from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
35
+ from ._evaluators._tool_output_utilization import _ToolOutputUtilizationEvaluator
36
36
  from ._model_configurations import (
37
37
  AzureAIProject,
38
38
  AzureOpenAIModelConfiguration,
@@ -42,6 +42,59 @@ from ._model_configurations import (
42
42
  Message,
43
43
  OpenAIModelConfiguration,
44
44
  )
45
+ from ._aoai.aoai_grader import AzureOpenAIGrader
46
+ from ._aoai.label_grader import AzureOpenAILabelGrader
47
+ from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
48
+ from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
49
+ from ._aoai.score_model_grader import AzureOpenAIScoreModelGrader
50
+ from ._aoai.python_grader import AzureOpenAIPythonGrader
51
+
52
+
53
+ _patch_all = []
54
+
55
+ # The converter from the AI service to the evaluator schema requires a dependency on
56
+ # ai.projects, but we also don't want to force users installing ai.evaluations to pull
57
+ # in ai.projects. So we only import it if it's available and the user has ai.projects.
58
+ # We use lazy loading to avoid printing messages during import unless the classes are actually used.
59
+ _lazy_imports = {}
60
+
61
+
62
+ def _create_lazy_import(class_name, module_path, dependency_name):
63
+ """Create a lazy import function for optional dependencies.
64
+
65
+ Args:
66
+ class_name: Name of the class to import
67
+ module_path: Module path to import from
68
+ dependency_name: Name of the dependency package for error message
69
+
70
+ Returns:
71
+ A function that performs the lazy import when called
72
+ """
73
+
74
+ def lazy_import():
75
+ try:
76
+ module = __import__(module_path, fromlist=[class_name])
77
+ cls = getattr(module, class_name)
78
+ _patch_all.append(class_name)
79
+ return cls
80
+ except ImportError:
81
+ raise ImportError(
82
+ f"Could not import {class_name}. Please install the dependency with `pip install {dependency_name}`."
83
+ )
84
+
85
+ return lazy_import
86
+
87
+
88
+ _lazy_imports["AIAgentConverter"] = _create_lazy_import(
89
+ "AIAgentConverter",
90
+ "azure.ai.evaluation._converters._ai_services",
91
+ "azure-ai-projects",
92
+ )
93
+ _lazy_imports["SKAgentConverter"] = _create_lazy_import(
94
+ "SKAgentConverter",
95
+ "azure.ai.evaluation._converters._sk_services",
96
+ "semantic-kernel",
97
+ )
45
98
 
46
99
  __all__ = [
47
100
  "evaluate",
@@ -50,6 +103,9 @@ __all__ = [
50
103
  "FluencyEvaluator",
51
104
  "GroundednessEvaluator",
52
105
  "GroundednessProEvaluator",
106
+ "ResponseCompletenessEvaluator",
107
+ "TaskAdherenceEvaluator",
108
+ "IntentResolutionEvaluator",
53
109
  "RelevanceEvaluator",
54
110
  "SimilarityEvaluator",
55
111
  "QAEvaluator",
@@ -73,10 +129,23 @@ __all__ = [
73
129
  "Conversation",
74
130
  "Message",
75
131
  "EvaluationResult",
76
- "ContentSafetyMultimodalEvaluator",
77
- "HateUnfairnessMultimodalEvaluator",
78
- "SelfHarmMultimodalEvaluator",
79
- "SexualMultimodalEvaluator",
80
- "ViolenceMultimodalEvaluator",
81
- "ProtectedMaterialMultimodalEvaluator",
132
+ "CodeVulnerabilityEvaluator",
133
+ "UngroundedAttributesEvaluator",
134
+ "ToolCallAccuracyEvaluator",
135
+ "_ToolOutputUtilizationEvaluator",
136
+ "AzureOpenAIGrader",
137
+ "AzureOpenAILabelGrader",
138
+ "AzureOpenAIStringCheckGrader",
139
+ "AzureOpenAITextSimilarityGrader",
140
+ "AzureOpenAIScoreModelGrader",
141
+ "AzureOpenAIPythonGrader",
82
142
  ]
143
+
144
+ __all__.extend([p for p in _patch_all if p not in __all__])
145
+
146
+
147
+ def __getattr__(name):
148
+ """Handle lazy imports for optional dependencies."""
149
+ if name in _lazy_imports:
150
+ return _lazy_imports[name]()
151
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
@@ -0,0 +1,10 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+
6
+ from .aoai_grader import AzureOpenAIGrader
7
+
8
+ __all__ = [
9
+ "AzureOpenAIGrader",
10
+ ]
@@ -0,0 +1,140 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Union
5
+
6
+ from typing_extensions import TypeIs
7
+
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION, TokenScope
10
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
11
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
12
+ from azure.ai.evaluation._user_agent import UserAgentSingleton
13
+ from azure.core.credentials import TokenCredential
14
+
15
+ if TYPE_CHECKING:
16
+ from openai.lib.azure import AzureADTokenProvider
17
+
18
+
19
+ @experimental
20
+ class AzureOpenAIGrader:
21
+ """Base class for Azure OpenAI grader wrappers.
22
+
23
+ Recommended only for use by experienced OpenAI API users.
24
+ Combines a model configuration and any grader configuration
25
+ into a singular object that can be used in evaluations.
26
+
27
+ Supplying an AzureOpenAIGrader to the `evaluate` method will cause an asynchronous request to evaluate
28
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
29
+ evaluation results.
30
+
31
+ :param model_config: The model configuration to use for the grader.
32
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
33
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
34
+ :param grader_config: The grader configuration to use for the grader. This is expected
35
+ to be formatted as a dictionary that matches the specifications of the sub-types of
36
+ the TestingCriterion alias specified in `OpenAI's SDK <https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L151>`_.
37
+ :type grader_config: Dict[str, Any]
38
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
39
+ :type credential: ~azure.core.credentials.TokenCredential
40
+ :param kwargs: Additional keyword arguments to pass to the grader.
41
+ :type kwargs: Any
42
+ """
43
+
44
+ id = "azureai://built-in/evaluators/azure-openai/custom_grader"
45
+
46
+ def __init__(
47
+ self,
48
+ *,
49
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
50
+ grader_config: Dict[str, Any],
51
+ credential: Optional[TokenCredential] = None,
52
+ **kwargs: Any,
53
+ ):
54
+ self._model_config = model_config
55
+ self._grader_config = grader_config
56
+ self._credential = credential
57
+
58
+ if kwargs.get("validate", True):
59
+ self._validate_model_config()
60
+ self._validate_grader_config()
61
+
62
+ def _validate_model_config(self) -> None:
63
+ """Validate the model configuration that this grader wrapper is using."""
64
+ msg = None
65
+ if self._is_azure_model_config(self._model_config):
66
+ if not any(auth for auth in (self._model_config.get("api_key"), self._credential)):
67
+ msg = (
68
+ f"{type(self).__name__}: Requires an api_key in the supplied model_config, "
69
+ + "or providing a credential to the grader's __init__ method. "
70
+ )
71
+
72
+ else:
73
+ if "api_key" not in self._model_config or not self._model_config.get("api_key"):
74
+ msg = f"{type(self).__name__}: Requires an api_key in the supplied model_config."
75
+
76
+ if msg is None:
77
+ return
78
+
79
+ raise EvaluationException(
80
+ message=msg,
81
+ blame=ErrorBlame.USER_ERROR,
82
+ category=ErrorCategory.INVALID_VALUE,
83
+ target=ErrorTarget.AOAI_GRADER,
84
+ )
85
+
86
+ def _validate_grader_config(self) -> None:
87
+ """Validate the grader configuration that this grader wrapper is using."""
88
+
89
+ return
90
+
91
+ @staticmethod
92
+ def _is_azure_model_config(
93
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
94
+ ) -> TypeIs[AzureOpenAIModelConfiguration]:
95
+ return "azure_endpoint" in model_config
96
+
97
+ def get_client(self) -> Any:
98
+ """Construct an appropriate OpenAI client using this grader's model configuration.
99
+ Returns a slightly different client depending on whether or not this grader's model
100
+ configuration is for Azure OpenAI or OpenAI.
101
+
102
+ :return: The OpenAI client.
103
+ :rtype: [~openai.OpenAI, ~openai.AzureOpenAI]
104
+ """
105
+ default_headers = {"User-Agent": UserAgentSingleton().value}
106
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration] = self._model_config
107
+ api_key: Optional[str] = model_config.get("api_key")
108
+
109
+ if self._is_azure_model_config(model_config):
110
+ from openai import AzureOpenAI
111
+
112
+ # TODO set default values?
113
+ return AzureOpenAI(
114
+ azure_endpoint=model_config["azure_endpoint"],
115
+ api_key=api_key, # Default-style access to appease linters.
116
+ api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
117
+ azure_deployment=model_config.get("azure_deployment", ""),
118
+ azure_ad_token_provider=self._get_token_provider(self._credential) if not api_key else None,
119
+ default_headers=default_headers,
120
+ )
121
+ from openai import OpenAI
122
+
123
+ # TODO add default values for base_url and organization?
124
+ return OpenAI(
125
+ api_key=api_key,
126
+ base_url=model_config.get("base_url", ""),
127
+ organization=model_config.get("organization", ""),
128
+ default_headers=default_headers,
129
+ )
130
+
131
+ @staticmethod
132
+ def _get_token_provider(cred: TokenCredential) -> "AzureADTokenProvider":
133
+ """Get the token provider the AzureOpenAI client.
134
+
135
+ :param TokenCredential cred: The Azure authentication credential.
136
+ :return: The token provider if a credential is provided, otherwise None.
137
+ :rtype: openai.lib.azure.AzureADTokenProvider
138
+ """
139
+
140
+ return lambda: cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT).token
@@ -0,0 +1,68 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, List, Optional, Union
5
+
6
+ from openai.types.graders import LabelModelGrader
7
+
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
10
+ from azure.core.credentials import TokenCredential
11
+
12
+ from .aoai_grader import AzureOpenAIGrader
13
+
14
+
15
+ @experimental
16
+ class AzureOpenAILabelGrader(AzureOpenAIGrader):
17
+ """Wrapper class for OpenAI's label model graders.
18
+
19
+ Supplying a LabelGrader to the `evaluate` method will cause an asynchronous request to evaluate
20
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
21
+ evaluation results.
22
+
23
+ :param model_config: The model configuration to use for the grader.
24
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
25
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
26
+ :param input: The list of label-based testing criterion for this grader. Individual
27
+ values of this list are expected to be dictionaries that match the format of any of the valid
28
+ `TestingCriterionLabelModelInput <https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L125C1-L125C32>`_
29
+ subtypes.
30
+ :type input: List[Dict[str, str]]
31
+ :param labels: A list of strings representing the classification labels of this grader.
32
+ :type labels: List[str]
33
+ :param model: The model to use for the evaluation. Must support structured outputs.
34
+ :type model: str
35
+ :param name: The name of the grader.
36
+ :type name: str
37
+ :param passing_labels: The labels that indicate a passing result. Must be a subset of labels.
38
+ :type passing_labels: List[str]
39
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
40
+ :type credential: ~azure.core.credentials.TokenCredential
41
+ :param kwargs: Additional keyword arguments to pass to the grader.
42
+ :type kwargs: Any
43
+ """
44
+
45
+ id = "azureai://built-in/evaluators/azure-openai/label_grader"
46
+ _type = "label_model"
47
+
48
+ def __init__(
49
+ self,
50
+ *,
51
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
52
+ input: List[Dict[str, str]],
53
+ labels: List[str],
54
+ model: str,
55
+ name: str,
56
+ passing_labels: List[str],
57
+ credential: Optional[TokenCredential] = None,
58
+ **kwargs: Any
59
+ ):
60
+ grader = LabelModelGrader(
61
+ input=input,
62
+ labels=labels,
63
+ model=model,
64
+ name=name,
65
+ passing_labels=passing_labels,
66
+ type=AzureOpenAILabelGrader._type,
67
+ )
68
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -0,0 +1,86 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Optional, Union
5
+
6
+ from openai.types.graders import PythonGrader
7
+
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
10
+ from azure.core.credentials import TokenCredential
11
+
12
+ from .aoai_grader import AzureOpenAIGrader
13
+
14
+
15
+ @experimental
16
+ class AzureOpenAIPythonGrader(AzureOpenAIGrader):
17
+ """Wrapper class for OpenAI's Python code graders.
18
+
19
+ Enables custom Python-based evaluation logic with flexible scoring and
20
+ pass/fail thresholds. The grader executes user-provided Python code
21
+ to evaluate outputs against custom criteria.
22
+
23
+ Supplying a PythonGrader to the `evaluate` method will cause an
24
+ asynchronous request to evaluate the grader via the OpenAI API. The
25
+ results of the evaluation will then be merged into the standard
26
+ evaluation results.
27
+
28
+ :param model_config: The model configuration to use for the grader.
29
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
31
+ :param name: The name of the grader.
32
+ :type name: str
33
+ :param image_tag: The image tag for the Python execution environment.
34
+ :type image_tag: str
35
+ :param pass_threshold: Score threshold for pass/fail classification. Scores >= threshold are considered passing.
36
+ :type pass_threshold: float
37
+ :param source: Python source code containing the grade function.
38
+ Must define: def grade(sample: dict, item: dict) -> float
39
+ :type source: str
40
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
41
+ :type credential: ~azure.core.credentials.TokenCredential
42
+ :param kwargs: Additional keyword arguments to pass to the grader.
43
+ :type kwargs: Any
44
+
45
+
46
+ .. admonition:: Example:
47
+
48
+ .. literalinclude:: ../samples/evaluation_samples_common.py
49
+ :start-after: [START python_grader_example]
50
+ :end-before: [END python_grader_example]
51
+ :language: python
52
+ :dedent: 8
53
+ :caption: Using AzureOpenAIPythonGrader for custom evaluation logic.
54
+ """
55
+
56
+ id = "azureai://built-in/evaluators/azure-openai/python_grader"
57
+ _type = "python"
58
+
59
+ def __init__(
60
+ self,
61
+ *,
62
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
63
+ name: str,
64
+ pass_threshold: float,
65
+ source: str,
66
+ image_tag: Optional[str] = None,
67
+ credential: Optional[TokenCredential] = None,
68
+ **kwargs: Any,
69
+ ):
70
+ # Validate pass_threshold
71
+ if not 0.0 <= pass_threshold <= 1.0:
72
+ raise ValueError("pass_threshold must be between 0.0 and 1.0")
73
+
74
+ # Store pass_threshold as instance attribute for potential future use
75
+ self.pass_threshold = pass_threshold
76
+
77
+ # Create OpenAI PythonGrader instance
78
+ grader = PythonGrader(
79
+ name=name,
80
+ image_tag=image_tag,
81
+ pass_threshold=pass_threshold,
82
+ source=source,
83
+ type=AzureOpenAIPythonGrader._type,
84
+ )
85
+
86
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -0,0 +1,94 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, List, Optional, Union
5
+
6
+ from openai.types.graders import ScoreModelGrader
7
+
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
10
+ from azure.core.credentials import TokenCredential
11
+
12
+ from .aoai_grader import AzureOpenAIGrader
13
+
14
+
15
+ @experimental
16
+ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
17
+ """Wrapper class for OpenAI's score model graders.
18
+
19
+ Enables continuous scoring evaluation with custom prompts and flexible
20
+ conversation-style inputs. Supports configurable score ranges and
21
+ pass thresholds for binary classification.
22
+
23
+ Supplying a ScoreModelGrader to the `evaluate` method will cause an
24
+ asynchronous request to evaluate the grader via the OpenAI API. The
25
+ results of the evaluation will then be merged into the standard
26
+ evaluation results.
27
+
28
+ :param model_config: The model configuration to use for the grader.
29
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
31
+ :param input: The input messages for the grader. List of conversation
32
+ messages with role and content.
33
+ :type input: List[Dict[str, str]]
34
+ :param model: The model to use for the evaluation.
35
+ :type model: str
36
+ :param name: The name of the grader.
37
+ :type name: str
38
+ :param range: The range of the score. Defaults to [0, 1].
39
+ :type range: Optional[List[float]]
40
+ :param pass_threshold: Score threshold for pass/fail classification.
41
+ Defaults to midpoint of range.
42
+ :type pass_threshold: Optional[float]
43
+ :param sampling_params: The sampling parameters for the model.
44
+ :type sampling_params: Optional[Dict[str, Any]]
45
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
46
+ :type credential: ~azure.core.credentials.TokenCredential
47
+ :param kwargs: Additional keyword arguments to pass to the grader.
48
+ :type kwargs: Any
49
+ """
50
+
51
+ id = "azureai://built-in/evaluators/azure-openai/score_model_grader"
52
+ _type = "score_model"
53
+
54
+ def __init__(
55
+ self,
56
+ *,
57
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
58
+ input: List[Dict[str, str]],
59
+ model: str,
60
+ name: str,
61
+ range: Optional[List[float]] = None,
62
+ pass_threshold: Optional[float] = None,
63
+ sampling_params: Optional[Dict[str, Any]] = None,
64
+ credential: Optional[TokenCredential] = None,
65
+ **kwargs: Any,
66
+ ):
67
+ # Validate range and pass_threshold
68
+ if range is not None:
69
+ if len(range) != 2 or range[0] >= range[1]:
70
+ raise ValueError("range must be a list of two numbers [min, max] where min < max")
71
+ else:
72
+ range = [0.0, 1.0] # Default range
73
+
74
+ if pass_threshold is not None:
75
+ if range and (pass_threshold < range[0] or pass_threshold > range[1]):
76
+ raise ValueError(f"pass_threshold {pass_threshold} must be within range {range}")
77
+ else:
78
+ pass_threshold = (range[0] + range[1]) / 2 # Default to midpoint
79
+
80
+ # Store pass_threshold as instance attribute
81
+ self.pass_threshold = pass_threshold
82
+
83
+ # Create OpenAI ScoreModelGrader instance
84
+ grader_kwargs = {"input": input, "model": model, "name": name, "type": AzureOpenAIScoreModelGrader._type}
85
+
86
+ if range is not None:
87
+ grader_kwargs["range"] = range
88
+ if sampling_params is not None:
89
+ grader_kwargs["sampling_params"] = sampling_params
90
+ grader_kwargs["pass_threshold"] = self.pass_threshold
91
+
92
+ grader = ScoreModelGrader(**grader_kwargs)
93
+
94
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -0,0 +1,66 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Optional, Union
5
+
6
+ from openai.types.graders import StringCheckGrader
7
+ from typing_extensions import Literal
8
+
9
+ from azure.ai.evaluation._common._experimental import experimental
10
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
11
+ from azure.core.credentials import TokenCredential
12
+
13
+ from .aoai_grader import AzureOpenAIGrader
14
+
15
+
16
+ @experimental
17
+ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
18
+ """Wrapper class for OpenAI's string check graders.
19
+
20
+ Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
21
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
22
+ evaluation results.
23
+
24
+ :param model_config: The model configuration to use for the grader.
25
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,~azure.ai.evaluation.OpenAIModelConfiguration]
26
+ :param input: The input text. This may include template strings.
27
+ :type input: str
28
+ :param name: The name of the grader.
29
+ :type name: str
30
+ :param operation: The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
31
+ :type operation: Literal["eq", "ne", "like", "ilike"]
32
+ :param reference: The reference text. This may include template strings.
33
+ :type reference: str
34
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
35
+ :type credential: ~azure.core.credentials.TokenCredential
36
+ :param kwargs: Additional keyword arguments to pass to the grader.
37
+ :type kwargs: Any
38
+ """
39
+
40
+ id = "azureai://built-in/evaluators/azure-openai/string_check_grader"
41
+ _type = "string_check"
42
+
43
+ def __init__(
44
+ self,
45
+ *,
46
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
47
+ input: str,
48
+ name: str,
49
+ operation: Literal[
50
+ "eq",
51
+ "ne",
52
+ "like",
53
+ "ilike",
54
+ ],
55
+ reference: str,
56
+ credential: Optional[TokenCredential] = None,
57
+ **kwargs: Any
58
+ ):
59
+ grader = StringCheckGrader(
60
+ input=input,
61
+ name=name,
62
+ operation=operation,
63
+ reference=reference,
64
+ type=AzureOpenAIStringCheckGrader._type,
65
+ )
66
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -0,0 +1,80 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Optional, Union
5
+
6
+ from openai.types.graders import TextSimilarityGrader
7
+ from typing_extensions import Literal
8
+
9
+ from azure.ai.evaluation._common._experimental import experimental
10
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
11
+ from azure.core.credentials import TokenCredential
12
+
13
+ from .aoai_grader import AzureOpenAIGrader
14
+
15
+
16
+ @experimental
17
+ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
18
+ """Wrapper class for OpenAI's string check graders.
19
+
20
+ Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
21
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
22
+ evaluation results.
23
+
24
+ :param model_config: The model configuration to use for the grader.
25
+ :type model_config: Union[
26
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
27
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
28
+ :param evaluation_metric: The evaluation metric to use.
29
+ :type evaluation_metric: Literal["fuzzy_match", "bleu", "gleu", "meteor", "rouge_1", "rouge_2", "rouge_3",
30
+ "rouge_4", "rouge_5", "rouge_l", "cosine"]
31
+ :param input: The text being graded.
32
+ :type input: str
33
+ :param pass_threshold: A float score where a value greater than or equal indicates a passing grade.
34
+ :type pass_threshold: float
35
+ :param reference: The text being graded against.
36
+ :type reference: str
37
+ :param name: The name of the grader.
38
+ :type name: str
39
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
40
+ :type credential: ~azure.core.credentials.TokenCredential
41
+ :param kwargs: Additional keyword arguments to pass to the grader.
42
+ :type kwargs: Any
43
+ """
44
+
45
+ id = "azureai://built-in/evaluators/azure-openai/text_similarity_grader"
46
+ _type = "text_similarity"
47
+
48
+ def __init__(
49
+ self,
50
+ *,
51
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
52
+ evaluation_metric: Literal[
53
+ "fuzzy_match",
54
+ "bleu",
55
+ "gleu",
56
+ "meteor",
57
+ "rouge_1",
58
+ "rouge_2",
59
+ "rouge_3",
60
+ "rouge_4",
61
+ "rouge_5",
62
+ "rouge_l",
63
+ "cosine",
64
+ ],
65
+ input: str,
66
+ pass_threshold: float,
67
+ reference: str,
68
+ name: str,
69
+ credential: Optional[TokenCredential] = None,
70
+ **kwargs: Any
71
+ ):
72
+ grader = TextSimilarityGrader(
73
+ evaluation_metric=evaluation_metric,
74
+ input=input,
75
+ pass_threshold=pass_threshold,
76
+ name=name,
77
+ reference=reference,
78
+ type=AzureOpenAITextSimilarityGrader._type,
79
+ )
80
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)