azure-ai-evaluation 1.11.2__tar.gz → 1.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (422) hide show
  1. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/CHANGELOG.md +31 -2
  2. {azure_ai_evaluation-1.11.2/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.13.0}/PKG-INFO +44 -10
  3. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/__init__.py +2 -0
  4. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/aoai_grader.py +69 -28
  5. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/label_grader.py +14 -13
  6. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/python_grader.py +15 -13
  7. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/score_model_grader.py +13 -10
  8. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/string_check_grader.py +13 -13
  9. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/text_similarity_grader.py +16 -25
  10. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/__init__.py +2 -1
  11. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_common/constants.py +194 -0
  12. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/evaluation_onedp_client.py +5 -5
  13. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/__init__.py +2 -2
  14. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_client.py +44 -14
  15. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_configuration.py +9 -7
  16. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_utils/model_base.py +1 -1
  17. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_validation.py +18 -2
  18. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/__init__.py +2 -2
  19. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/_client.py +44 -14
  20. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +9 -7
  21. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +12 -0
  22. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +3942 -1631
  23. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  24. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/models/_enums.py +217 -0
  25. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  26. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/operations/__init__.py +12 -0
  27. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/operations/_operations.py +5422 -2577
  28. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/rai_service.py +299 -2
  29. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/utils.py +173 -39
  30. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_constants.py +218 -0
  31. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_eval_mapping.py +10 -0
  32. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +10 -0
  33. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluate/_evaluate.py +2422 -0
  34. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +330 -51
  35. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_utils.py +17 -6
  36. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluator_definition.py +76 -0
  37. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -0
  38. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -17
  39. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +7 -1
  40. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +80 -4
  41. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  42. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +7 -1
  43. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +28 -13
  44. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  45. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +23 -4
  46. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -7
  47. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +36 -19
  48. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +43 -20
  49. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +7 -1
  50. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +7 -1
  51. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +32 -6
  52. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  53. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  54. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  55. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  56. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  57. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +23 -127
  58. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  59. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  60. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  61. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  62. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  63. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  64. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  65. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  66. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  67. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  68. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  69. azure_ai_evaluation-1.13.0/azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  70. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -19
  71. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_exceptions.py +6 -0
  72. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +14 -1
  73. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +2 -1
  74. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +54 -6
  75. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_model_configurations.py +26 -0
  76. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_version.py +1 -1
  77. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_attack_objective_generator.py +3 -1
  78. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_attack_strategy.py +1 -0
  79. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_callback_chat_target.py +45 -14
  80. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_evaluation_processor.py +129 -12
  81. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_mlflow_integration.py +144 -36
  82. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_orchestrator_manager.py +309 -51
  83. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_red_team.py +503 -37
  84. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_red_team_result.py +264 -15
  85. azure_ai_evaluation-1.13.0/azure/ai/evaluation/red_team/_result_processor.py +1532 -0
  86. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/constants.py +1 -0
  87. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +126 -25
  88. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/metric_mapping.py +10 -7
  89. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +3 -25
  90. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +1 -1
  91. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  92. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +1 -1
  93. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
  94. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +1 -1
  95. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +25 -2
  96. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -0
  97. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +1 -1
  98. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  99. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
  100. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0/azure_ai_evaluation.egg-info}/PKG-INFO +44 -10
  101. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure_ai_evaluation.egg-info/SOURCES.txt +26 -0
  102. azure_ai_evaluation-1.13.0/azure_ai_evaluation.egg-info/requires.txt +30 -0
  103. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/agent_evaluation.ipynb +3 -3
  104. azure_ai_evaluation-1.13.0/samples/agent_evaluators/task_navigation_efficiency.ipynb +578 -0
  105. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/evaluation_samples_evaluate.py +217 -2
  106. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/evaluation_samples_evaluate_fdp.py +218 -3
  107. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/setup.py +11 -6
  108. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/conftest.py +1 -1
  109. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_adv_simulator.py +2 -1
  110. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_aoai_graders.py +126 -3
  111. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_builtin_evaluators.py +6 -7
  112. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_mass_evaluate.py +10 -6
  113. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_prompty_async.py +37 -23
  114. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_red_team.py +1 -0
  115. azure_ai_evaluation-1.13.0/tests/unittests/test_aoai_data_source.py +510 -0
  116. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_aoai_integration_features.py +1 -1
  117. azure_ai_evaluation-1.13.0/tests/unittests/test_aoai_nested_integration.py +289 -0
  118. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_aoai_score_model_grader.py +2 -0
  119. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_built_in_evaluator.py +17 -4
  120. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_completeness_evaluator.py +22 -12
  121. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluate.py +287 -44
  122. azure_ai_evaluation-1.13.0/tests/unittests/test_evaluator_scoring_patterns.py +245 -0
  123. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +0 -1
  124. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_attack_strategy.py +1 -0
  125. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_callback_chat_target.py +13 -52
  126. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_red_team.py +3 -0
  127. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_strategy_utils.py +14 -39
  128. azure_ai_evaluation-1.13.0/tests/unittests/test_task_completion_evaluator.py +377 -0
  129. azure_ai_evaluation-1.13.0/tests/unittests/test_task_navigation_efficiency_evaluators.py +186 -0
  130. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_tool_call_accuracy_evaluator.py +19 -15
  131. azure_ai_evaluation-1.13.0/tests/unittests/test_tool_input_accuracy_evaluator.py +654 -0
  132. azure_ai_evaluation-1.13.0/tests/unittests/test_tool_selection_evaluator.py +286 -0
  133. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_utils.py +93 -0
  134. azure_ai_evaluation-1.11.2/azure/ai/evaluation/_common/constants.py +0 -85
  135. azure_ai_evaluation-1.11.2/azure/ai/evaluation/_common/onedp/models/__init__.py +0 -168
  136. azure_ai_evaluation-1.11.2/azure/ai/evaluation/_common/onedp/models/_models.py +0 -2690
  137. azure_ai_evaluation-1.11.2/azure/ai/evaluation/_constants.py +0 -118
  138. azure_ai_evaluation-1.11.2/azure/ai/evaluation/_evaluate/_evaluate.py +0 -1306
  139. azure_ai_evaluation-1.11.2/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -167
  140. azure_ai_evaluation-1.11.2/azure/ai/evaluation/red_team/_result_processor.py +0 -610
  141. azure_ai_evaluation-1.11.2/azure_ai_evaluation.egg-info/requires.txt +0 -16
  142. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/MANIFEST.in +0 -0
  143. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/NOTICE.txt +0 -0
  144. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/README.md +0 -0
  145. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/TROUBLESHOOTING.md +0 -0
  146. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/__init__.py +0 -0
  147. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/__init__.py +0 -0
  148. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_aoai/__init__.py +0 -0
  149. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  150. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
  151. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_azure/_envs.py +0 -0
  152. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_azure/_models.py +0 -0
  153. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  154. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  155. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/math.py +0 -0
  156. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
  157. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_patch.py +0 -0
  158. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
  159. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
  160. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_utils/__init__.py +0 -0
  161. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_utils/serialization.py +0 -0
  162. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
  163. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/_version.py +0 -0
  164. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/_patch.py +0 -0
  165. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +0 -0
  166. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/models/_patch.py +0 -0
  167. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -0
  168. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
  169. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
  170. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
  171. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
  172. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
  173. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
  174. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
  175. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
  176. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
  177. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
  178. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
  179. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
  180. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
  181. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
  182. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
  183. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
  184. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
  185. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
  186. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  187. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  188. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  189. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  190. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  191. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
  192. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  193. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  194. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  195. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  196. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  197. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  198. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  199. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  200. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  201. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  202. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  203. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  204. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
  205. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  206. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
  207. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_converters/__init__.py +0 -0
  208. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_converters/_ai_services.py +0 -0
  209. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_converters/_models.py +0 -0
  210. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_converters/_sk_services.py +0 -0
  211. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  212. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  213. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  214. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  215. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  216. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  217. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  218. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
  219. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  220. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  221. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  222. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  223. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  224. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  225. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  226. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
  227. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
  228. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  229. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  230. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
  231. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
  232. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
  233. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
  234. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
  235. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +0 -0
  236. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +0 -0
  237. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  238. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  239. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  240. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
  241. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  242. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  243. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  244. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  245. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  246. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  247. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  248. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
  249. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  250. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  251. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  252. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  253. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  254. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
  255. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  256. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  257. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
  258. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  259. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  260. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  261. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
  262. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  263. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  264. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  265. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  266. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  267. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
  268. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  269. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
  270. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  271. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  272. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  273. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_http_utils.py +0 -0
  274. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/__init__.py +0 -0
  275. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
  276. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
  277. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
  278. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
  279. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
  280. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
  281. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
  282. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
  283. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
  284. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
  285. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
  286. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
  287. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  288. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
  289. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +0 -0
  290. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  291. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
  292. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +0 -0
  293. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
  294. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  295. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
  296. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
  297. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
  298. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  299. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_common/__init__.py +0 -0
  300. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +0 -0
  301. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
  302. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +0 -0
  303. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  304. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
  305. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
  306. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  307. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
  308. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  309. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
  310. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_user_agent.py +0 -0
  311. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_vendor/__init__.py +0 -0
  312. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  313. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  314. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  315. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  316. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  317. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/py.typed +0 -0
  318. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/__init__.py +0 -0
  319. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_agent/__init__.py +0 -0
  320. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +0 -0
  321. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +0 -0
  322. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +0 -0
  323. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +0 -0
  324. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
  325. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/__init__.py +0 -0
  326. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +0 -0
  327. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +0 -0
  328. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +0 -0
  329. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/exception_utils.py +0 -0
  330. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/file_utils.py +0 -0
  331. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
  332. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/progress_utils.py +0 -0
  333. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/red_team/_utils/retry_utils.py +0 -0
  334. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  335. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  336. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  337. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  338. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -0
  339. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  340. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  341. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  342. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  343. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  344. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  345. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  346. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  347. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  348. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_simulator.py +0 -0
  349. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  350. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  351. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  352. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  353. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/migration_guide.md +0 -0
  354. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/pyproject.toml +0 -0
  355. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/README.md +0 -0
  356. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/instructions.md +0 -0
  357. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  358. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/response_completeness.ipynb +0 -0
  359. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  360. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/task_adherence.ipynb +0 -0
  361. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
  362. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/agent_evaluators/user_functions.py +0 -0
  363. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/aoai_score_model_grader_sample.py +0 -0
  364. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/data/custom_objectives_with_context_example.json +0 -0
  365. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/data/evaluate_test_data.jsonl +0 -0
  366. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/evaluation_samples_common.py +0 -0
  367. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/evaluation_samples_safety_evaluation.py +0 -0
  368. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/evaluation_samples_simulate.py +0 -0
  369. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/evaluation_samples_threshold.py +0 -0
  370. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/red_team_agent_tool_sample.py +0 -0
  371. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/red_team_samples.py +0 -0
  372. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/red_team_skip_upload.py +0 -0
  373. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/samples/semantic_kernel_red_team_agent_sample.py +0 -0
  374. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/setup.cfg +0 -0
  375. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/__init__.py +0 -0
  376. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/__openai_patcher.py +0 -0
  377. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
  378. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
  379. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
  380. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +0 -0
  381. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +0 -0
  382. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/__init__.py +0 -0
  383. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  384. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/target_fn.py +0 -0
  385. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_evaluate.py +0 -0
  386. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_lite_management_client.py +0 -0
  387. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_metrics_upload.py +0 -0
  388. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_remote_evaluation.py +0 -0
  389. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/e2etests/test_sim_and_eval.py +0 -0
  390. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_agent_evaluators.py +0 -0
  391. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_aoai_alignment_missing_rows.py +0 -0
  392. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_aoai_evaluation_pagination.py +0 -0
  393. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_aoai_python_grader.py +0 -0
  394. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_batch_run_context.py +0 -0
  395. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  396. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
  397. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_document_retrieval_evaluator.py +0 -0
  398. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_eval_run.py +0 -0
  399. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluate_mismatch.py +0 -0
  400. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluate_performance.py +0 -0
  401. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  402. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
  403. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  404. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
  405. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
  406. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  407. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_lazy_imports.py +0 -0
  408. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  409. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/__init__.py +0 -0
  410. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_constants.py +0 -0
  411. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_formatting_utils.py +0 -0
  412. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +0 -0
  413. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_rai_service_target.py +0 -0
  414. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +0 -0
  415. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_red_team_language_support.py +0 -0
  416. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_redteam/test_red_team_result.py +0 -0
  417. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_remote_evaluation_features.py +0 -0
  418. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_safety_evaluation.py +0 -0
  419. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_save_eval.py +0 -0
  420. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_simulator.py +0 -0
  421. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  422. {azure_ai_evaluation-1.11.2 → azure_ai_evaluation-1.13.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
@@ -1,17 +1,46 @@
1
1
  # Release History
2
2
 
3
+ ## 1.13.0 (2025-10-30)
4
+
5
+ ### Features Added
6
+
7
+ - Updated `IndirectAttack` risk category for RedTeam to `IndirectJailbreak` to better reflect its purpose. This change allows users to apply cross-domain prompt injection (XPIA) attack strategies across all risk categories, enabling more comprehensive security testing of AI systems against indirect prompt injection attacks during red teaming.
8
+ - Added `TaskAdherence`, `SensitiveDataLeakage`, and `ProhibitedActions` as cloud-only agent safety risk categories for red teaming.
9
+ - Updated all evaluators' output to be of the following schema:
10
+ - `gpt_{evaluator_name}`, `{evaluator_name}`: float score,
11
+ - `{evaluator_name}_result`: pass/fail based on threshold,
12
+ - `{evaluator_name}_reason`, `{evaluator_name}_threshold`
13
+ - `{evaluator_name}_prompt_tokens`, `{evaluator_name}_completion_tokens`, `{evaluator_name}_total_tokens`, `{evaluator_name}_finish_reason`
14
+ - `{evaluator_name}_model`: model used for evaluation
15
+ - `{evaluator_name}_sample_input`, `{evaluator_name}_sample_output`: input and output used for evaluation
16
+
17
+ This change standardizes the output format across all evaluators and follows OTel convention.
18
+
19
+ ### Bugs Fixed
20
+
21
+ - `image_tag` parameter in `AzureOpenAIPythonGrader` is now optional.
22
+
23
+ ## 1.12.0 (2025-10-02)
24
+
25
+ ### Features Added
26
+ - AOAI Graders now accept a "credential" parameter that can be used for authentication with an AzureOpenAIModelConfiguration
27
+ - Added `is_reasoning_model` parameter support to `CoherenceEvaluator`, `FluencyEvaluator`, `SimilarityEvaluator`, `GroundednessEvaluator`, `RetrievalEvaluator`, and `RelevanceEvaluator` to enable reasoning model configuration for o1/o3 models.
28
+
29
+ ### Bugs Fixed
30
+ - Support for multi-level nesting in OpenAI grader (experimental)
31
+
3
32
  ## 1.11.2 (2025-10-09)
4
33
 
5
34
  ### Bugs Fixed
6
35
 
7
36
  - **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
8
37
 
9
- ## 1.11.1 (2025-09-17)
38
+ ## 1.11.1 (2025-09-19)
10
39
 
11
40
  ### Bugs Fixed
12
41
  - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
13
42
 
14
- ## 1.11.0 (2025-09-02)
43
+ ## 1.11.0 (2025-09-03)
15
44
 
16
45
  ### Features Added
17
46
  - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: azure-ai-evaluation
3
- Version: 1.11.2
3
+ Version: 1.13.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -22,20 +22,25 @@ Requires-Python: >=3.9
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: NOTICE.txt
24
24
  Requires-Dist: pyjwt>=2.8.0
25
- Requires-Dist: azure-identity>=1.16.0
26
- Requires-Dist: azure-core>=1.30.2
25
+ Requires-Dist: azure-identity>=1.19.0
26
+ Requires-Dist: azure-core>=1.31.0
27
27
  Requires-Dist: nltk>=3.9.1
28
- Requires-Dist: azure-storage-blob>=12.10.0
29
- Requires-Dist: httpx>=0.25.1
30
- Requires-Dist: pandas<3.0.0,>=2.1.2
28
+ Requires-Dist: azure-storage-blob>=12.19.0
29
+ Requires-Dist: httpx>=0.27.2
30
+ Requires-Dist: pandas<3.0.0,>=2.1.2; python_version < "3.13"
31
+ Requires-Dist: pandas<3.0.0,>=2.2.3; python_version == "3.13"
32
+ Requires-Dist: pandas<3.0.0,>=2.3.3; python_version >= "3.14"
31
33
  Requires-Dist: openai>=1.108.0
32
34
  Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
33
35
  Requires-Dist: msrest>=0.6.21
34
36
  Requires-Dist: Jinja2>=3.1.6
35
37
  Requires-Dist: aiohttp>=3.0
36
38
  Provides-Extra: redteam
37
- Requires-Dist: pyrit==0.8.1; extra == "redteam"
38
- Requires-Dist: duckdb==1.3.2; extra == "redteam"
39
+ Requires-Dist: pyrit==0.8.1; python_version >= "3.10" and extra == "redteam"
40
+ Requires-Dist: duckdb==1.3.2; python_version >= "3.10" and extra == "redteam"
41
+ Provides-Extra: opentelemetry
42
+ Requires-Dist: opentelemetry-sdk>=1.17.0; extra == "opentelemetry"
43
+ Requires-Dist: azure-monitor-opentelemetry-exporter>=1.0.0b17; extra == "opentelemetry"
39
44
  Dynamic: author
40
45
  Dynamic: author-email
41
46
  Dynamic: classifier
@@ -413,18 +418,47 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
413
418
 
414
419
  # Release History
415
420
 
421
+ ## 1.13.0 (2025-10-30)
422
+
423
+ ### Features Added
424
+
425
+ - Updated `IndirectAttack` risk category for RedTeam to `IndirectJailbreak` to better reflect its purpose. This change allows users to apply cross-domain prompt injection (XPIA) attack strategies across all risk categories, enabling more comprehensive security testing of AI systems against indirect prompt injection attacks during red teaming.
426
+ - Added `TaskAdherence`, `SensitiveDataLeakage`, and `ProhibitedActions` as cloud-only agent safety risk categories for red teaming.
427
+ - Updated all evaluators' output to be of the following schema:
428
+ - `gpt_{evaluator_name}`, `{evaluator_name}`: float score,
429
+ - `{evaluator_name}_result`: pass/fail based on threshold,
430
+ - `{evaluator_name}_reason`, `{evaluator_name}_threshold`
431
+ - `{evaluator_name}_prompt_tokens`, `{evaluator_name}_completion_tokens`, `{evaluator_name}_total_tokens`, `{evaluator_name}_finish_reason`
432
+ - `{evaluator_name}_model`: model used for evaluation
433
+ - `{evaluator_name}_sample_input`, `{evaluator_name}_sample_output`: input and output used for evaluation
434
+
435
+ This change standardizes the output format across all evaluators and follows OTel convention.
436
+
437
+ ### Bugs Fixed
438
+
439
+ - `image_tag` parameter in `AzureOpenAIPythonGrader` is now optional.
440
+
441
+ ## 1.12.0 (2025-10-02)
442
+
443
+ ### Features Added
444
+ - AOAI Graders now accept a "credential" parameter that can be used for authentication with an AzureOpenAIModelConfiguration
445
+ - Added `is_reasoning_model` parameter support to `CoherenceEvaluator`, `FluencyEvaluator`, `SimilarityEvaluator`, `GroundednessEvaluator`, `RetrievalEvaluator`, and `RelevanceEvaluator` to enable reasoning model configuration for o1/o3 models.
446
+
447
+ ### Bugs Fixed
448
+ - Support for multi-level nesting in OpenAI grader (experimental)
449
+
416
450
  ## 1.11.2 (2025-10-09)
417
451
 
418
452
  ### Bugs Fixed
419
453
 
420
454
  - **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
421
455
 
422
- ## 1.11.1 (2025-09-17)
456
+ ## 1.11.1 (2025-09-19)
423
457
 
424
458
  ### Bugs Fixed
425
459
  - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
426
460
 
427
- ## 1.11.0 (2025-09-02)
461
+ ## 1.11.0 (2025-09-03)
428
462
 
429
463
  ### Features Added
430
464
  - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
@@ -32,6 +32,7 @@ from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
32
32
  from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
33
33
  from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
34
34
  from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
35
+ from ._evaluators._tool_output_utilization import _ToolOutputUtilizationEvaluator
35
36
  from ._model_configurations import (
36
37
  AzureAIProject,
37
38
  AzureOpenAIModelConfiguration,
@@ -131,6 +132,7 @@ __all__ = [
131
132
  "CodeVulnerabilityEvaluator",
132
133
  "UngroundedAttributesEvaluator",
133
134
  "ToolCallAccuracyEvaluator",
135
+ "_ToolOutputUtilizationEvaluator",
134
136
  "AzureOpenAIGrader",
135
137
  "AzureOpenAILabelGrader",
136
138
  "AzureOpenAIStringCheckGrader",
@@ -1,19 +1,26 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
4
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Union
5
+
6
+ from typing_extensions import TypeIs
5
7
 
6
- from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION, TokenScope
7
10
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
11
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8
12
  from azure.ai.evaluation._user_agent import UserAgentSingleton
9
- from typing import Any, Dict, Union
10
- from azure.ai.evaluation._common._experimental import experimental
13
+ from azure.core.credentials import TokenCredential
14
+
15
+ if TYPE_CHECKING:
16
+ from openai.lib.azure import AzureADTokenProvider
11
17
 
12
18
 
13
19
  @experimental
14
20
  class AzureOpenAIGrader:
15
- """
16
- Base class for Azure OpenAI grader wrappers, recommended only for use by experienced OpenAI API users.
21
+ """Base class for Azure OpenAI grader wrappers.
22
+
23
+ Recommended only for use by experienced OpenAI API users.
17
24
  Combines a model configuration and any grader configuration
18
25
  into a singular object that can be used in evaluations.
19
26
 
@@ -22,18 +29,16 @@ class AzureOpenAIGrader:
22
29
  evaluation results.
23
30
 
24
31
  :param model_config: The model configuration to use for the grader.
25
- :type model_config: Union[
26
- ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
27
- ~azure.ai.evaluation.OpenAIModelConfiguration
28
- ]
32
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
33
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
29
34
  :param grader_config: The grader configuration to use for the grader. This is expected
30
35
  to be formatted as a dictionary that matches the specifications of the sub-types of
31
- the TestingCriterion alias specified in (OpenAI's SDK)[https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L151].
36
+ the TestingCriterion alias specified in `OpenAI's SDK <https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L151>`_.
32
37
  :type grader_config: Dict[str, Any]
38
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
39
+ :type credential: ~azure.core.credentials.TokenCredential
33
40
  :param kwargs: Additional keyword arguments to pass to the grader.
34
41
  :type kwargs: Any
35
-
36
-
37
42
  """
38
43
 
39
44
  id = "azureai://built-in/evaluators/azure-openai/custom_grader"
@@ -43,10 +48,12 @@ class AzureOpenAIGrader:
43
48
  *,
44
49
  model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
45
50
  grader_config: Dict[str, Any],
51
+ credential: Optional[TokenCredential] = None,
46
52
  **kwargs: Any,
47
53
  ):
48
54
  self._model_config = model_config
49
55
  self._grader_config = grader_config
56
+ self._credential = credential
50
57
 
51
58
  if kwargs.get("validate", True):
52
59
  self._validate_model_config()
@@ -54,20 +61,39 @@ class AzureOpenAIGrader:
54
61
 
55
62
  def _validate_model_config(self) -> None:
56
63
  """Validate the model configuration that this grader wrapper is using."""
57
- if "api_key" not in self._model_config or not self._model_config.get("api_key"):
58
- msg = f"{type(self).__name__}: Requires an api_key in the supplied model_config."
59
- raise EvaluationException(
60
- message=msg,
61
- blame=ErrorBlame.USER_ERROR,
62
- category=ErrorCategory.INVALID_VALUE,
63
- target=ErrorTarget.AOAI_GRADER,
64
- )
64
+ msg = None
65
+ if self._is_azure_model_config(self._model_config):
66
+ if not any(auth for auth in (self._model_config.get("api_key"), self._credential)):
67
+ msg = (
68
+ f"{type(self).__name__}: Requires an api_key in the supplied model_config, "
69
+ + "or providing a credential to the grader's __init__ method. "
70
+ )
71
+
72
+ else:
73
+ if "api_key" not in self._model_config or not self._model_config.get("api_key"):
74
+ msg = f"{type(self).__name__}: Requires an api_key in the supplied model_config."
75
+
76
+ if msg is None:
77
+ return
78
+
79
+ raise EvaluationException(
80
+ message=msg,
81
+ blame=ErrorBlame.USER_ERROR,
82
+ category=ErrorCategory.INVALID_VALUE,
83
+ target=ErrorTarget.AOAI_GRADER,
84
+ )
65
85
 
66
86
  def _validate_grader_config(self) -> None:
67
87
  """Validate the grader configuration that this grader wrapper is using."""
68
88
 
69
89
  return
70
90
 
91
+ @staticmethod
92
+ def _is_azure_model_config(
93
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
94
+ ) -> TypeIs[AzureOpenAIModelConfiguration]:
95
+ return "azure_endpoint" in model_config
96
+
71
97
  def get_client(self) -> Any:
72
98
  """Construct an appropriate OpenAI client using this grader's model configuration.
73
99
  Returns a slightly different client depending on whether or not this grader's model
@@ -77,23 +103,38 @@ class AzureOpenAIGrader:
77
103
  :rtype: [~openai.OpenAI, ~openai.AzureOpenAI]
78
104
  """
79
105
  default_headers = {"User-Agent": UserAgentSingleton().value}
80
- if "azure_endpoint" in self._model_config:
106
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration] = self._model_config
107
+ api_key: Optional[str] = model_config.get("api_key")
108
+
109
+ if self._is_azure_model_config(model_config):
81
110
  from openai import AzureOpenAI
82
111
 
83
112
  # TODO set default values?
84
113
  return AzureOpenAI(
85
- azure_endpoint=self._model_config["azure_endpoint"],
86
- api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
114
+ azure_endpoint=model_config["azure_endpoint"],
115
+ api_key=api_key, # Default-style access to appease linters.
87
116
  api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
88
- azure_deployment=self._model_config.get("azure_deployment", ""),
117
+ azure_deployment=model_config.get("azure_deployment", ""),
118
+ azure_ad_token_provider=self._get_token_provider(self._credential) if not api_key else None,
89
119
  default_headers=default_headers,
90
120
  )
91
121
  from openai import OpenAI
92
122
 
93
123
  # TODO add default values for base_url and organization?
94
124
  return OpenAI(
95
- api_key=self._model_config["api_key"],
96
- base_url=self._model_config.get("base_url", ""),
97
- organization=self._model_config.get("organization", ""),
125
+ api_key=api_key,
126
+ base_url=model_config.get("base_url", ""),
127
+ organization=model_config.get("organization", ""),
98
128
  default_headers=default_headers,
99
129
  )
130
+
131
+ @staticmethod
132
+ def _get_token_provider(cred: TokenCredential) -> "AzureADTokenProvider":
133
+ """Get the token provider the AzureOpenAI client.
134
+
135
+ :param TokenCredential cred: The Azure authentication credential.
136
+ :return: The token provider if a credential is provided, otherwise None.
137
+ :rtype: openai.lib.azure.AzureADTokenProvider
138
+ """
139
+
140
+ return lambda: cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT).token
@@ -1,32 +1,31 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from typing import Any, Dict, Union, List
4
+ from typing import Any, Dict, List, Optional, Union
5
5
 
6
- from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
6
  from openai.types.graders import LabelModelGrader
7
+
8
8
  from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
10
+ from azure.core.credentials import TokenCredential
9
11
 
10
12
  from .aoai_grader import AzureOpenAIGrader
11
13
 
12
14
 
13
15
  @experimental
14
16
  class AzureOpenAILabelGrader(AzureOpenAIGrader):
15
- """
16
- Wrapper class for OpenAI's label model graders.
17
+ """Wrapper class for OpenAI's label model graders.
17
18
 
18
19
  Supplying a LabelGrader to the `evaluate` method will cause an asynchronous request to evaluate
19
20
  the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
20
21
  evaluation results.
21
22
 
22
23
  :param model_config: The model configuration to use for the grader.
23
- :type model_config: Union[
24
- ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
25
- ~azure.ai.evaluation.OpenAIModelConfiguration
26
- ]
24
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
25
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
27
26
  :param input: The list of label-based testing criterion for this grader. Individual
28
27
  values of this list are expected to be dictionaries that match the format of any of the valid
29
- (TestingCriterionLabelModelInput)[https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L125C1-L125C32]
28
+ `TestingCriterionLabelModelInput <https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L125C1-L125C32>`_
30
29
  subtypes.
31
30
  :type input: List[Dict[str, str]]
32
31
  :param labels: A list of strings representing the classification labels of this grader.
@@ -37,13 +36,14 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
37
36
  :type name: str
38
37
  :param passing_labels: The labels that indicate a passing result. Must be a subset of labels.
39
38
  :type passing_labels: List[str]
39
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
40
+ :type credential: ~azure.core.credentials.TokenCredential
40
41
  :param kwargs: Additional keyword arguments to pass to the grader.
41
42
  :type kwargs: Any
42
-
43
-
44
43
  """
45
44
 
46
45
  id = "azureai://built-in/evaluators/azure-openai/label_grader"
46
+ _type = "label_model"
47
47
 
48
48
  def __init__(
49
49
  self,
@@ -54,6 +54,7 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
54
54
  model: str,
55
55
  name: str,
56
56
  passing_labels: List[str],
57
+ credential: Optional[TokenCredential] = None,
57
58
  **kwargs: Any
58
59
  ):
59
60
  grader = LabelModelGrader(
@@ -62,6 +63,6 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
62
63
  model=model,
63
64
  name=name,
64
65
  passing_labels=passing_labels,
65
- type="label_model",
66
+ type=AzureOpenAILabelGrader._type,
66
67
  )
67
- super().__init__(model_config=model_config, grader_config=grader, **kwargs)
68
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -1,19 +1,20 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from typing import Any, Dict, Union, Optional
4
+ from typing import Any, Dict, Optional, Union
5
5
 
6
- from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
6
  from openai.types.graders import PythonGrader
7
+
8
8
  from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
10
+ from azure.core.credentials import TokenCredential
9
11
 
10
12
  from .aoai_grader import AzureOpenAIGrader
11
13
 
12
14
 
13
15
  @experimental
14
16
  class AzureOpenAIPythonGrader(AzureOpenAIGrader):
15
- """
16
- Wrapper class for OpenAI's Python code graders.
17
+ """Wrapper class for OpenAI's Python code graders.
17
18
 
18
19
  Enables custom Python-based evaluation logic with flexible scoring and
19
20
  pass/fail thresholds. The grader executes user-provided Python code
@@ -25,20 +26,19 @@ class AzureOpenAIPythonGrader(AzureOpenAIGrader):
25
26
  evaluation results.
26
27
 
27
28
  :param model_config: The model configuration to use for the grader.
28
- :type model_config: Union[
29
- ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
- ~azure.ai.evaluation.OpenAIModelConfiguration
31
- ]
29
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
32
31
  :param name: The name of the grader.
33
32
  :type name: str
34
33
  :param image_tag: The image tag for the Python execution environment.
35
34
  :type image_tag: str
36
- :param pass_threshold: Score threshold for pass/fail classification.
37
- Scores >= threshold are considered passing.
35
+ :param pass_threshold: Score threshold for pass/fail classification. Scores >= threshold are considered passing.
38
36
  :type pass_threshold: float
39
37
  :param source: Python source code containing the grade function.
40
38
  Must define: def grade(sample: dict, item: dict) -> float
41
39
  :type source: str
40
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
41
+ :type credential: ~azure.core.credentials.TokenCredential
42
42
  :param kwargs: Additional keyword arguments to pass to the grader.
43
43
  :type kwargs: Any
44
44
 
@@ -54,15 +54,17 @@ class AzureOpenAIPythonGrader(AzureOpenAIGrader):
54
54
  """
55
55
 
56
56
  id = "azureai://built-in/evaluators/azure-openai/python_grader"
57
+ _type = "python"
57
58
 
58
59
  def __init__(
59
60
  self,
60
61
  *,
61
62
  model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
62
63
  name: str,
63
- image_tag: str,
64
64
  pass_threshold: float,
65
65
  source: str,
66
+ image_tag: Optional[str] = None,
67
+ credential: Optional[TokenCredential] = None,
66
68
  **kwargs: Any,
67
69
  ):
68
70
  # Validate pass_threshold
@@ -78,7 +80,7 @@ class AzureOpenAIPythonGrader(AzureOpenAIGrader):
78
80
  image_tag=image_tag,
79
81
  pass_threshold=pass_threshold,
80
82
  source=source,
81
- type="python",
83
+ type=AzureOpenAIPythonGrader._type,
82
84
  )
83
85
 
84
- super().__init__(model_config=model_config, grader_config=grader, **kwargs)
86
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -1,19 +1,20 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from typing import Any, Dict, Union, List, Optional
4
+ from typing import Any, Dict, List, Optional, Union
5
5
 
6
- from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
6
  from openai.types.graders import ScoreModelGrader
7
+
8
8
  from azure.ai.evaluation._common._experimental import experimental
9
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
10
+ from azure.core.credentials import TokenCredential
9
11
 
10
12
  from .aoai_grader import AzureOpenAIGrader
11
13
 
12
14
 
13
15
  @experimental
14
16
  class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
15
- """
16
- Wrapper class for OpenAI's score model graders.
17
+ """Wrapper class for OpenAI's score model graders.
17
18
 
18
19
  Enables continuous scoring evaluation with custom prompts and flexible
19
20
  conversation-style inputs. Supports configurable score ranges and
@@ -25,10 +26,8 @@ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
25
26
  evaluation results.
26
27
 
27
28
  :param model_config: The model configuration to use for the grader.
28
- :type model_config: Union[
29
- ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
- ~azure.ai.evaluation.OpenAIModelConfiguration
31
- ]
29
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
+ ~azure.ai.evaluation.OpenAIModelConfiguration]
32
31
  :param input: The input messages for the grader. List of conversation
33
32
  messages with role and content.
34
33
  :type input: List[Dict[str, str]]
@@ -43,11 +42,14 @@ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
43
42
  :type pass_threshold: Optional[float]
44
43
  :param sampling_params: The sampling parameters for the model.
45
44
  :type sampling_params: Optional[Dict[str, Any]]
45
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
46
+ :type credential: ~azure.core.credentials.TokenCredential
46
47
  :param kwargs: Additional keyword arguments to pass to the grader.
47
48
  :type kwargs: Any
48
49
  """
49
50
 
50
51
  id = "azureai://built-in/evaluators/azure-openai/score_model_grader"
52
+ _type = "score_model"
51
53
 
52
54
  def __init__(
53
55
  self,
@@ -59,6 +61,7 @@ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
59
61
  range: Optional[List[float]] = None,
60
62
  pass_threshold: Optional[float] = None,
61
63
  sampling_params: Optional[Dict[str, Any]] = None,
64
+ credential: Optional[TokenCredential] = None,
62
65
  **kwargs: Any,
63
66
  ):
64
67
  # Validate range and pass_threshold
@@ -78,7 +81,7 @@ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
78
81
  self.pass_threshold = pass_threshold
79
82
 
80
83
  # Create OpenAI ScoreModelGrader instance
81
- grader_kwargs = {"input": input, "model": model, "name": name, "type": "score_model"}
84
+ grader_kwargs = {"input": input, "model": model, "name": name, "type": AzureOpenAIScoreModelGrader._type}
82
85
 
83
86
  if range is not None:
84
87
  grader_kwargs["range"] = range
@@ -88,4 +91,4 @@ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
88
91
 
89
92
  grader = ScoreModelGrader(**grader_kwargs)
90
93
 
91
- super().__init__(model_config=model_config, grader_config=grader, **kwargs)
94
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)
@@ -1,30 +1,28 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from typing import Any, Dict, Union
5
- from typing_extensions import Literal
4
+ from typing import Any, Dict, Optional, Union
6
5
 
7
- from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8
6
  from openai.types.graders import StringCheckGrader
7
+ from typing_extensions import Literal
8
+
9
9
  from azure.ai.evaluation._common._experimental import experimental
10
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
11
+ from azure.core.credentials import TokenCredential
10
12
 
11
13
  from .aoai_grader import AzureOpenAIGrader
12
14
 
13
15
 
14
16
  @experimental
15
17
  class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
16
- """
17
- Wrapper class for OpenAI's string check graders.
18
+ """Wrapper class for OpenAI's string check graders.
18
19
 
19
20
  Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
20
21
  the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
21
22
  evaluation results.
22
23
 
23
24
  :param model_config: The model configuration to use for the grader.
24
- :type model_config: Union[
25
- ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
26
- ~azure.ai.evaluation.OpenAIModelConfiguration
27
- ]
25
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,~azure.ai.evaluation.OpenAIModelConfiguration]
28
26
  :param input: The input text. This may include template strings.
29
27
  :type input: str
30
28
  :param name: The name of the grader.
@@ -33,13 +31,14 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
33
31
  :type operation: Literal["eq", "ne", "like", "ilike"]
34
32
  :param reference: The reference text. This may include template strings.
35
33
  :type reference: str
34
+ :param credential: The credential to use to authenticate to the model. Only applicable to AzureOpenAI models.
35
+ :type credential: ~azure.core.credentials.TokenCredential
36
36
  :param kwargs: Additional keyword arguments to pass to the grader.
37
37
  :type kwargs: Any
38
-
39
-
40
38
  """
41
39
 
42
40
  id = "azureai://built-in/evaluators/azure-openai/string_check_grader"
41
+ _type = "string_check"
43
42
 
44
43
  def __init__(
45
44
  self,
@@ -54,6 +53,7 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
54
53
  "ilike",
55
54
  ],
56
55
  reference: str,
56
+ credential: Optional[TokenCredential] = None,
57
57
  **kwargs: Any
58
58
  ):
59
59
  grader = StringCheckGrader(
@@ -61,6 +61,6 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
61
61
  name=name,
62
62
  operation=operation,
63
63
  reference=reference,
64
- type="string_check",
64
+ type=AzureOpenAIStringCheckGrader._type,
65
65
  )
66
- super().__init__(model_config=model_config, grader_config=grader, **kwargs)
66
+ super().__init__(model_config=model_config, grader_config=grader, credential=credential, **kwargs)