azure-ai-evaluation 1.8.0__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/CHANGELOG.md +14 -0
  2. {azure_ai_evaluation-1.8.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.9.0}/PKG-INFO +15 -1
  3. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/__init__.py +13 -2
  4. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_aoai/__init__.py +1 -1
  5. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
  6. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_aoai/label_grader.py +3 -2
  7. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_aoai/score_model_grader.py +90 -0
  8. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
  9. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
  10. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_azure/_envs.py +9 -10
  11. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_azure/_token_manager.py +7 -1
  12. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/constants.py +11 -2
  13. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
  14. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/__init__.py +32 -32
  15. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_client.py +136 -139
  16. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
  17. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp/models → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp}/_patch.py +21 -21
  18. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  19. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  20. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  21. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_validation.py +50 -50
  22. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_version.py +9 -9
  23. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
  24. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
  25. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
  26. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp/aio/operations → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/aio}/_patch.py +21 -21
  27. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/aio}/operations/__init__.py +37 -39
  28. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
  29. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/aio/operations}/_patch.py +21 -21
  30. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
  31. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
  32. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
  33. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp/aio → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/models}/_patch.py +21 -21
  34. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp/aio → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp}/operations/__init__.py +37 -39
  35. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5657
  36. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  37. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/rai_service.py +86 -50
  38. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
  39. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
  40. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/utils.py +124 -3
  41. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_constants.py +2 -1
  42. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_converters/_ai_services.py +9 -8
  43. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_converters/_models.py +46 -0
  44. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_converters/_sk_services.py +495 -0
  45. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_eval_mapping.py +2 -2
  46. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +4 -4
  47. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
  48. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_evaluate.py +60 -54
  49. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +130 -89
  50. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
  51. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_utils.py +24 -15
  52. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +3 -3
  53. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +12 -11
  54. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -5
  55. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +15 -5
  56. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
  57. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +6 -1
  58. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +13 -13
  59. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +7 -7
  60. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +7 -7
  61. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +7 -7
  62. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +6 -6
  63. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
  64. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +34 -64
  65. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -3
  66. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +4 -4
  67. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -2
  68. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +3 -3
  69. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -7
  70. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +30 -25
  71. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  72. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +2 -3
  73. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +6 -6
  74. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -4
  75. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +8 -13
  76. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -25
  77. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +4 -4
  78. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +21 -21
  79. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +5 -5
  80. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -3
  81. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -14
  82. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +43 -34
  83. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +3 -3
  84. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +12 -11
  85. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +6 -6
  86. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_exceptions.py +10 -0
  87. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_http_utils.py +3 -3
  88. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +3 -3
  89. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
  90. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +5 -10
  91. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
  92. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
  93. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
  94. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
  95. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +193 -111
  96. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_user_agent.py +37 -0
  97. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_vendor/__init__.py +3 -0
  98. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_version.py +1 -1
  99. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/__init__.py +3 -1
  100. azure_ai_evaluation-1.9.0/azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  101. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +68 -71
  102. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +103 -145
  103. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +26 -6
  104. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +62 -71
  105. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
  106. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_attack_strategy.py +2 -1
  107. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
  108. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_default_converter.py +1 -1
  109. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_red_team.py +1286 -739
  110. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_red_team_result.py +43 -38
  111. azure_ai_evaluation-1.9.0/azure/ai/evaluation/red_team/_utils/__init__.py +3 -0
  112. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +32 -32
  113. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +163 -138
  114. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +14 -14
  115. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/constants.py +2 -12
  116. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
  117. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
  118. azure_ai_evaluation-1.9.0/azure/ai/evaluation/red_team/_utils/metric_mapping.py +50 -0
  119. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +33 -25
  120. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
  121. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +26 -15
  122. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
  123. azure_ai_evaluation-1.9.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  124. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
  125. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +5 -5
  126. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -24
  127. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
  128. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +10 -8
  129. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
  130. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
  131. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
  132. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_simulator.py +9 -8
  133. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0/azure_ai_evaluation.egg-info}/PKG-INFO +15 -1
  134. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure_ai_evaluation.egg-info/SOURCES.txt +10 -1
  135. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/pyproject.toml +2 -2
  136. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/user_functions.py +9 -2
  137. azure_ai_evaluation-1.9.0/samples/aoai_score_model_grader_sample.py +257 -0
  138. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/evaluation_samples_evaluate.py +62 -72
  139. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/evaluation_samples_evaluate_fdp.py +99 -92
  140. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/evaluation_samples_safety_evaluation.py +118 -85
  141. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/evaluation_samples_threshold.py +35 -58
  142. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/red_team_agent_tool_sample.py +16 -17
  143. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/red_team_samples.py +106 -126
  144. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/red_team_skip_upload.py +15 -9
  145. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/semantic_kernel_red_team_agent_sample.py +13 -17
  146. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/setup.py +1 -5
  147. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/conftest.py +12 -1
  148. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/converters/ai_agent_converter/serialization_helper.py +34 -54
  149. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +17 -6
  150. azure_ai_evaluation-1.9.0/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +67 -0
  151. azure_ai_evaluation-1.9.0/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +128 -0
  152. azure_ai_evaluation-1.9.0/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +112 -0
  153. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_adv_simulator.py +6 -6
  154. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_aoai_graders.py +129 -38
  155. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_builtin_evaluators.py +178 -105
  156. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_evaluate.py +60 -0
  157. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_mass_evaluate.py +8 -8
  158. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_metrics_upload.py +2 -0
  159. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_remote_evaluation.py +3 -5
  160. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_sim_and_eval.py +46 -41
  161. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_agent_evaluators.py +28 -43
  162. azure_ai_evaluation-1.9.0/tests/unittests/test_aoai_evaluation_pagination.py +244 -0
  163. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_aoai_integration_features.py +17 -26
  164. azure_ai_evaluation-1.9.0/tests/unittests/test_aoai_score_model_grader.py +951 -0
  165. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_batch_run_context.py +2 -2
  166. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_completeness_evaluator.py +29 -16
  167. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_document_retrieval_evaluator.py +106 -57
  168. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluate.py +20 -14
  169. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +28 -106
  170. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +45 -68
  171. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +91 -63
  172. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/__init__.py +3 -2
  173. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +34 -49
  174. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_attack_strategy.py +4 -8
  175. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_callback_chat_target.py +22 -27
  176. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_constants.py +7 -23
  177. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_formatting_utils.py +36 -40
  178. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +33 -16
  179. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_rai_service_target.py +108 -52
  180. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +17 -8
  181. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_red_team.py +476 -459
  182. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_red_team_result.py +28 -41
  183. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_redteam/test_strategy_utils.py +41 -58
  184. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_remote_evaluation_features.py +10 -5
  185. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_safety_evaluation.py +9 -18
  186. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_save_eval.py +5 -2
  187. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_tool_call_accuracy_evaluator.py +47 -76
  188. azure_ai_evaluation-1.9.0/tests/unittests/test_utils.py +636 -0
  189. azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
  190. azure_ai_evaluation-1.8.0/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -21
  191. azure_ai_evaluation-1.8.0/azure/ai/evaluation/_converters/__init__.py +0 -3
  192. azure_ai_evaluation-1.8.0/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -161
  193. azure_ai_evaluation-1.8.0/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -3
  194. azure_ai_evaluation-1.8.0/azure/ai/evaluation/_user_agent.py +0 -6
  195. azure_ai_evaluation-1.8.0/azure/ai/evaluation/red_team/_agent/__init__.py +0 -3
  196. azure_ai_evaluation-1.8.0/azure/ai/evaluation/red_team/_utils/__init__.py +0 -3
  197. azure_ai_evaluation-1.8.0/azure/ai/evaluation/red_team/_utils/metric_mapping.py +0 -23
  198. azure_ai_evaluation-1.8.0/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -35
  199. azure_ai_evaluation-1.8.0/tests/unittests/test_utils.py +0 -258
  200. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/MANIFEST.in +0 -0
  201. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/NOTICE.txt +0 -0
  202. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/README.md +0 -0
  203. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/TROUBLESHOOTING.md +0 -0
  204. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/__init__.py +0 -0
  205. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/__init__.py +0 -0
  206. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  207. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
  208. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_azure/_models.py +0 -0
  209. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/__init__.py +0 -0
  210. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  211. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/math.py +0 -0
  212. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
  213. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
  214. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
  215. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
  216. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
  217. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
  218. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
  219. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
  220. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
  221. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
  222. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
  223. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
  224. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
  225. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
  226. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
  227. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
  228. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
  229. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
  230. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
  231. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
  232. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
  233. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  234. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  235. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  236. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  237. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  238. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
  239. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  240. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  241. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  242. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  243. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  244. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  245. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  246. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  247. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  248. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  249. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  250. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  251. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  252. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
  253. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_evaluate → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_converters}/__init__.py +0 -0
  254. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_evaluators → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_evaluate}/__init__.py +0 -0
  255. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  256. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  257. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  258. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  259. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  260. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
  261. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_legacy → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_evaluators}/__init__.py +0 -0
  262. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  263. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  264. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  265. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  266. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  267. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
  268. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  269. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  270. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  271. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  272. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  273. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  274. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  275. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  276. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  277. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  278. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  279. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  280. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  281. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  282. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  283. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  284. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  285. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
  286. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  287. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  288. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  289. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  290. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  291. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  292. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  293. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
  294. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  295. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  296. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  297. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_legacy/_common → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_legacy}/__init__.py +0 -0
  298. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
  299. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
  300. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
  301. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
  302. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
  303. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
  304. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
  305. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
  306. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
  307. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
  308. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
  309. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
  310. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  311. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
  312. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  313. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +0 -0
  314. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
  315. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  316. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
  317. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
  318. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  319. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_legacy/_common}/__init__.py +0 -0
  320. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
  321. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  322. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
  323. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
  324. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
  325. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  326. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  327. {azure_ai_evaluation-1.8.0/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.9.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
  328. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  329. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  330. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  331. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  332. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  333. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  334. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/py.typed +0 -0
  335. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  336. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  337. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
  338. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  339. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  340. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  341. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  342. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  343. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  344. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  345. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  346. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  347. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  348. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  349. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  350. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure_ai_evaluation.egg-info/requires.txt +0 -0
  351. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  352. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/migration_guide.md +0 -0
  353. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/README.md +0 -0
  354. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
  355. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/instructions.md +0 -0
  356. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  357. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/response_completeness.ipynb +0 -0
  358. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  359. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/task_adherence.ipynb +0 -0
  360. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
  361. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/data/evaluate_test_data.jsonl +0 -0
  362. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/evaluation_samples_common.py +0 -0
  363. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/samples/evaluation_samples_simulate.py +0 -0
  364. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/setup.cfg +0 -0
  365. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/__init__.py +0 -0
  366. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/__openai_patcher.py +0 -0
  367. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/__init__.py +0 -0
  368. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  369. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/target_fn.py +0 -0
  370. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_lite_management_client.py +0 -0
  371. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/e2etests/test_prompty_async.py +0 -0
  372. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_built_in_evaluator.py +0 -0
  373. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  374. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
  375. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_eval_run.py +0 -0
  376. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluate_performance.py +0 -0
  377. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  378. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  379. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  380. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  381. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_simulator.py +0 -0
  382. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  383. {azure_ai_evaluation-1.8.0 → azure_ai_evaluation-1.9.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
@@ -1,5 +1,19 @@
1
1
  # Release History
2
2
 
3
+ ## 1.9.0 (2025-07-02)
4
+
5
+ ### Features Added
6
+
7
+ - Added support for Azure Open AI evaluation via `AzureOpenAIScoreModelGrader` class, which serves as a wrapper around Azure Open AI score model configurations. This new grader object can be supplied to the main `evaluate` method as if it were a normal callable evaluator.
8
+ - Added new experimental risk categories ProtectedMaterial and CodeVulnerability for redteam agent scan.
9
+
10
+
11
+ ### Bugs Fixed
12
+
13
+ - Significant improvements to IntentResolution evaluator. New version has less variance, is nearly 2x faster and consumes fewer tokens.
14
+ - Fixed MeteorScoreEvaluator and other threshold-based evaluators returning incorrect binary results due to integer conversion of decimal scores. Previously, decimal scores like 0.9375 were incorrectly converted to integers (0) before threshold comparison, causing them to fail even when above the threshold. [#41415](https://github.com/Azure/azure-sdk-for-python/issues/41415)
15
+ - Added a new enum `ADVERSARIAL_QA_DOCUMENTS` which moves all the "file_content" type prompts away from `ADVERSARIAL_QA` to the new enum
16
+
3
17
  ## 1.8.0 (2025-05-29)
4
18
 
5
19
  ### Features Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.8.0
3
+ Version: 1.9.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -400,6 +400,20 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
400
400
 
401
401
  # Release History
402
402
 
403
+ ## 1.9.0 (2025-07-02)
404
+
405
+ ### Features Added
406
+
407
+ - Added support for Azure Open AI evaluation via `AzureOpenAIScoreModelGrader` class, which serves as a wrapper around Azure Open AI score model configurations. This new grader object can be supplied to the main `evaluate` method as if it were a normal callable evaluator.
408
+ - Added new experimental risk categories ProtectedMaterial and CodeVulnerability for redteam agent scan.
409
+
410
+
411
+ ### Bugs Fixed
412
+
413
+ - Significant improvements to IntentResolution evaluator. New version has less variance, is nearly 2x faster and consumes fewer tokens.
414
+ - Fixed MeteorScoreEvaluator and other threshold-based evaluators returning incorrect binary results due to integer conversion of decimal scores. Previously, decimal scores like 0.9375 were incorrectly converted to integers (0) before threshold comparison, causing them to fail even when above the threshold. [#41415](https://github.com/Azure/azure-sdk-for-python/issues/41415)
415
+ - Added a new enum `ADVERSARIAL_QA_DOCUMENTS` which moves all the "file_content" type prompts away from `ADVERSARIAL_QA` to the new enum
416
+
403
417
  ## 1.8.0 (2025-05-29)
404
418
 
405
419
  ### Features Added
@@ -45,6 +45,7 @@ from ._aoai.aoai_grader import AzureOpenAIGrader
45
45
  from ._aoai.label_grader import AzureOpenAILabelGrader
46
46
  from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
47
47
  from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
48
+ from ._aoai.score_model_grader import AzureOpenAIScoreModelGrader
48
49
 
49
50
 
50
51
  _patch_all = []
@@ -54,10 +55,19 @@ _patch_all = []
54
55
  # in ai.projects. So we only import it if it's available and the user has ai.projects.
55
56
  try:
56
57
  from ._converters._ai_services import AIAgentConverter
58
+
57
59
  _patch_all.append("AIAgentConverter")
58
60
  except ImportError:
59
- print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
61
+ print(
62
+ "[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`."
63
+ )
64
+
65
+ try:
66
+ from ._converters._sk_services import SKAgentConverter
60
67
 
68
+ _patch_all.append("SKAgentConverter")
69
+ except ImportError:
70
+ print("[INFO] Could not import SKAgentConverter. Please install the dependency with `pip install semantic-kernel`.")
61
71
 
62
72
  __all__ = [
63
73
  "evaluate",
@@ -99,6 +109,7 @@ __all__ = [
99
109
  "AzureOpenAILabelGrader",
100
110
  "AzureOpenAIStringCheckGrader",
101
111
  "AzureOpenAITextSimilarityGrader",
112
+ "AzureOpenAIScoreModelGrader",
102
113
  ]
103
114
 
104
- __all__.extend([p for p in _patch_all if p not in __all__])
115
+ __all__.extend([p for p in _patch_all if p not in __all__])
@@ -7,4 +7,4 @@ from .aoai_grader import AzureOpenAIGrader
7
7
 
8
8
  __all__ = [
9
9
  "AzureOpenAIGrader",
10
- ]
10
+ ]
@@ -5,12 +5,13 @@ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfigurat
5
5
 
6
6
  from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION
7
7
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
8
+ from azure.ai.evaluation._user_agent import UserAgentSingleton
8
9
  from typing import Any, Dict, Union
9
10
  from azure.ai.evaluation._common._experimental import experimental
10
11
 
11
12
 
12
13
  @experimental
13
- class AzureOpenAIGrader():
14
+ class AzureOpenAIGrader:
14
15
  """
15
16
  Base class for Azure OpenAI grader wrappers, recommended only for use by experienced OpenAI API users.
16
17
  Combines a model configuration and any grader configuration
@@ -35,9 +36,15 @@ class AzureOpenAIGrader():
35
36
 
36
37
  """
37
38
 
38
- id = "aoai://general"
39
+ id = "azureai://built-in/evaluators/azure-openai/custom_grader"
39
40
 
40
- def __init__(self, *, model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], grader_config: Dict[str, Any], **kwargs: Any):
41
+ def __init__(
42
+ self,
43
+ *,
44
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
45
+ grader_config: Dict[str, Any],
46
+ **kwargs: Any,
47
+ ):
41
48
  self._model_config = model_config
42
49
  self._grader_config = grader_config
43
50
 
@@ -45,8 +52,6 @@ class AzureOpenAIGrader():
45
52
  self._validate_model_config()
46
53
  self._validate_grader_config()
47
54
 
48
-
49
-
50
55
  def _validate_model_config(self) -> None:
51
56
  """Validate the model configuration that this grader wrapper is using."""
52
57
  if "api_key" not in self._model_config or not self._model_config.get("api_key"):
@@ -57,7 +62,7 @@ class AzureOpenAIGrader():
57
62
  category=ErrorCategory.INVALID_VALUE,
58
63
  target=ErrorTarget.AOAI_GRADER,
59
64
  )
60
-
65
+
61
66
  def _validate_grader_config(self) -> None:
62
67
  """Validate the grader configuration that this grader wrapper is using."""
63
68
 
@@ -71,19 +76,24 @@ class AzureOpenAIGrader():
71
76
  :return: The OpenAI client.
72
77
  :rtype: [~openai.OpenAI, ~openai.AzureOpenAI]
73
78
  """
79
+ default_headers = {"User-Agent": UserAgentSingleton().value}
74
80
  if "azure_endpoint" in self._model_config:
75
- from openai import AzureOpenAI
76
- # TODO set default values?
77
- return AzureOpenAI(
81
+ from openai import AzureOpenAI
82
+
83
+ # TODO set default values?
84
+ return AzureOpenAI(
78
85
  azure_endpoint=self._model_config["azure_endpoint"],
79
- api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
80
- api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
86
+ api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
87
+ api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
81
88
  azure_deployment=self._model_config.get("azure_deployment", ""),
89
+ default_headers=default_headers,
82
90
  )
83
91
  from openai import OpenAI
92
+
84
93
  # TODO add default values for base_url and organization?
85
94
  return OpenAI(
86
95
  api_key=self._model_config["api_key"],
87
96
  base_url=self._model_config.get("base_url", ""),
88
97
  organization=self._model_config.get("organization", ""),
98
+ default_headers=default_headers,
89
99
  )
@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
9
9
 
10
10
  from .aoai_grader import AzureOpenAIGrader
11
11
 
12
+
12
13
  @experimental
13
14
  class AzureOpenAILabelGrader(AzureOpenAIGrader):
14
15
  """
@@ -42,12 +43,12 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
42
43
 
43
44
  """
44
45
 
45
- id = "aoai://label_model"
46
+ id = "azureai://built-in/evaluators/azure-openai/label_grader"
46
47
 
47
48
  def __init__(
48
49
  self,
49
50
  *,
50
- model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
51
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
51
52
  input: List[Dict[str, str]],
52
53
  labels: List[str],
53
54
  model: str,
@@ -0,0 +1,90 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Union, List, Optional
5
+
6
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
+ from openai.types.graders import ScoreModelGrader
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+
10
+ from .aoai_grader import AzureOpenAIGrader
11
+
12
+
13
+ @experimental
14
+ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
15
+ """
16
+ Wrapper class for OpenAI's score model graders.
17
+
18
+ Enables continuous scoring evaluation with custom prompts and flexible
19
+ conversation-style inputs. Supports configurable score ranges and
20
+ pass thresholds for binary classification.
21
+
22
+ Supplying a ScoreModelGrader to the `evaluate` method will cause an
23
+ asynchronous request to evaluate the grader via the OpenAI API. The
24
+ results of the evaluation will then be merged into the standard
25
+ evaluation results.
26
+
27
+ :param model_config: The model configuration to use for the grader.
28
+ :type model_config: Union[
29
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
+ ~azure.ai.evaluation.OpenAIModelConfiguration
31
+ ]
32
+ :param input: The input messages for the grader. List of conversation
33
+ messages with role and content.
34
+ :type input: List[Dict[str, str]]
35
+ :param model: The model to use for the evaluation.
36
+ :type model: str
37
+ :param name: The name of the grader.
38
+ :type name: str
39
+ :param range: The range of the score. Defaults to [0, 1].
40
+ :type range: Optional[List[float]]
41
+ :param pass_threshold: Score threshold for pass/fail classification.
42
+ Defaults to midpoint of range.
43
+ :type pass_threshold: Optional[float]
44
+ :param sampling_params: The sampling parameters for the model.
45
+ :type sampling_params: Optional[Dict[str, Any]]
46
+ :param kwargs: Additional keyword arguments to pass to the grader.
47
+ :type kwargs: Any
48
+ """
49
+
50
+ id = "azureai://built-in/evaluators/azure-openai/score_model_grader"
51
+
52
+ def __init__(
53
+ self,
54
+ *,
55
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
56
+ input: List[Dict[str, str]],
57
+ model: str,
58
+ name: str,
59
+ range: Optional[List[float]] = None,
60
+ pass_threshold: Optional[float] = None,
61
+ sampling_params: Optional[Dict[str, Any]] = None,
62
+ **kwargs: Any,
63
+ ):
64
+ # Validate range and pass_threshold
65
+ if range is not None:
66
+ if len(range) != 2 or range[0] >= range[1]:
67
+ raise ValueError("range must be a list of two numbers [min, max] where min < max")
68
+ else:
69
+ range = [0.0, 1.0] # Default range
70
+
71
+ if pass_threshold is not None:
72
+ if range and (pass_threshold < range[0] or pass_threshold > range[1]):
73
+ raise ValueError(f"pass_threshold {pass_threshold} must be within range {range}")
74
+ else:
75
+ pass_threshold = (range[0] + range[1]) / 2 # Default to midpoint
76
+
77
+ # Store pass_threshold as instance attribute
78
+ self.pass_threshold = pass_threshold
79
+
80
+ # Create OpenAI ScoreModelGrader instance
81
+ grader_kwargs = {"input": input, "model": model, "name": name, "type": "score_model"}
82
+
83
+ if range is not None:
84
+ grader_kwargs["range"] = range
85
+ if sampling_params is not None:
86
+ grader_kwargs["sampling_params"] = sampling_params
87
+
88
+ grader = ScoreModelGrader(**grader_kwargs)
89
+
90
+ super().__init__(model_config=model_config, grader_config=grader, **kwargs)
@@ -10,6 +10,7 @@ from azure.ai.evaluation._common._experimental import experimental
10
10
 
11
11
  from .aoai_grader import AzureOpenAIGrader
12
12
 
13
+
13
14
  @experimental
14
15
  class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
15
16
  """
@@ -38,12 +39,12 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
38
39
 
39
40
  """
40
41
 
41
- id = "aoai://string_check"
42
+ id = "azureai://built-in/evaluators/azure-openai/string_check_grader"
42
43
 
43
44
  def __init__(
44
45
  self,
45
46
  *,
46
- model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
47
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
47
48
  input: str,
48
49
  name: str,
49
50
  operation: Literal[
@@ -10,6 +10,7 @@ from azure.ai.evaluation._common._experimental import experimental
10
10
 
11
11
  from .aoai_grader import AzureOpenAIGrader
12
12
 
13
+
13
14
  @experimental
14
15
  class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
15
16
  """
@@ -52,12 +53,12 @@ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
52
53
 
53
54
  """
54
55
 
55
- id = "aoai://text_similarity"
56
+ id = "azureai://built-in/evaluators/azure-openai/text_similarity_grader"
56
57
 
57
58
  def __init__(
58
59
  self,
59
60
  *,
60
- model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
61
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
61
62
  evaluation_metric: Literal[
62
63
  "fuzzy_match",
63
64
  "bleu",
@@ -19,6 +19,7 @@ from azure.core.pipeline.policies import ProxyPolicy, AsyncRetryPolicy
19
19
 
20
20
  class AzureEnvironmentMetadata(TypedDict):
21
21
  """Configuration for various Azure environments. All endpoints include a trailing slash."""
22
+
22
23
  portal_endpoint: str
23
24
  """The management portal for the Azure environment (e.g. https://portal.azure.com/)"""
24
25
  resource_manager_endpoint: str
@@ -107,15 +108,15 @@ class AzureEnvironmentClient:
107
108
 
108
109
  def case_insensitive_match(d: Mapping[str, Any], key: str) -> Optional[Any]:
109
110
  key = key.strip().lower()
110
- return next((v for k,v in d.items() if k.strip().lower() == key), None)
111
+ return next((v for k, v in d.items() if k.strip().lower() == key), None)
111
112
 
112
113
  async with _ASYNC_LOCK:
113
114
  cloud = _KNOWN_AZURE_ENVIRONMENTS.get(name) or case_insensitive_match(_KNOWN_AZURE_ENVIRONMENTS, name)
114
115
  if cloud:
115
116
  return cloud
116
- default_endpoint = (_KNOWN_AZURE_ENVIRONMENTS
117
- .get(_DEFAULT_AZURE_ENV_NAME, {})
118
- .get("resource_manager_endpoint"))
117
+ default_endpoint = _KNOWN_AZURE_ENVIRONMENTS.get(_DEFAULT_AZURE_ENV_NAME, {}).get(
118
+ "resource_manager_endpoint"
119
+ )
119
120
 
120
121
  metadata_url = self.get_default_metadata_url(default_endpoint)
121
122
  clouds = await self.get_clouds_async(metadata_url=metadata_url, update_cached=update_cached)
@@ -124,10 +125,7 @@ class AzureEnvironmentClient:
124
125
  return cloud_metadata
125
126
 
126
127
  async def get_clouds_async(
127
- self,
128
- *,
129
- metadata_url: Optional[str] = None,
130
- update_cached: bool = True
128
+ self, *, metadata_url: Optional[str] = None, update_cached: bool = True
131
129
  ) -> Mapping[str, AzureEnvironmentMetadata]:
132
130
  metadata_url = metadata_url or self.get_default_metadata_url()
133
131
 
@@ -149,7 +147,8 @@ class AzureEnvironmentClient:
149
147
  default_endpoint = default_endpoint or "https://management.azure.com/"
150
148
  metadata_url = os.getenv(
151
149
  _ENV_ARM_CLOUD_METADATA_URL,
152
- f"{default_endpoint}metadata/endpoints?api-version={AzureEnvironmentClient.DEFAULT_API_VERSION}")
150
+ f"{default_endpoint}metadata/endpoints?api-version={AzureEnvironmentClient.DEFAULT_API_VERSION}",
151
+ )
153
152
  return metadata_url
154
153
 
155
154
  @staticmethod
@@ -197,7 +196,7 @@ class AzureEnvironmentClient:
197
196
 
198
197
  def recursive_update(d: Dict, u: Mapping) -> None:
199
198
  """Recursively update a dictionary.
200
-
199
+
201
200
  :param Dict d: The dictionary to update.
202
201
  :param Mapping u: The mapping to update from.
203
202
  """
@@ -73,7 +73,13 @@ class AzureMLTokenManager(APITokenManager):
73
73
  return super().get_aad_credential()
74
74
 
75
75
  def get_token(
76
- self, scopes = None, claims: Union[str, None] = None, tenant_id: Union[str, None] = None, enable_cae: bool = False, **kwargs: Any) -> AccessToken:
76
+ self,
77
+ scopes=None,
78
+ claims: Union[str, None] = None,
79
+ tenant_id: Union[str, None] = None,
80
+ enable_cae: bool = False,
81
+ **kwargs: Any
82
+ ) -> AccessToken:
77
83
  """Get the API token. If the token is not available or has expired, refresh the token.
78
84
 
79
85
  :return: API token
@@ -5,8 +5,17 @@ from enum import Enum
5
5
 
6
6
  from azure.core import CaseInsensitiveEnumMeta
7
7
 
8
- PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency", "intent_resolution",
9
- "tool_call_accurate", "response_completeness", "task_adherence"]
8
+ PROMPT_BASED_REASON_EVALUATORS = [
9
+ "coherence",
10
+ "relevance",
11
+ "retrieval",
12
+ "groundedness",
13
+ "fluency",
14
+ "intent_resolution",
15
+ "tool_call_accurate",
16
+ "response_completeness",
17
+ "task_adherence",
18
+ ]
10
19
 
11
20
 
12
21
  class CommonConstants:
@@ -6,13 +6,22 @@ import logging
6
6
  from typing import Union, Any, Dict
7
7
  from azure.core.credentials import AzureKeyCredential, TokenCredential
8
8
  from azure.ai.evaluation._common.onedp import AIProjectClient as RestEvaluationServiceClient
9
- from azure.ai.evaluation._common.onedp.models import (PendingUploadRequest, PendingUploadType, EvaluationResult,
10
- ResultType, AssetCredentialRequest, EvaluationUpload, InputDataset, RedTeamUpload)
9
+ from azure.ai.evaluation._common.onedp.models import (
10
+ PendingUploadRequest,
11
+ PendingUploadType,
12
+ EvaluationResult,
13
+ ResultType,
14
+ AssetCredentialRequest,
15
+ EvaluationUpload,
16
+ InputDataset,
17
+ RedTeamUpload,
18
+ )
11
19
  from azure.storage.blob import ContainerClient
12
20
  from .utils import upload
13
21
 
14
22
  LOGGER = logging.getLogger(__name__)
15
23
 
24
+
16
25
  class EvaluationServiceOneDPClient:
17
26
 
18
27
  def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
@@ -23,7 +32,15 @@ class EvaluationServiceOneDPClient:
23
32
  )
24
33
 
25
34
  def create_evaluation_result(
26
- self, *, name: str, path: str, version=1, metrics: Dict[str, int]=None, result_type: ResultType=ResultType.EVALUATION, **kwargs) -> EvaluationResult:
35
+ self,
36
+ *,
37
+ name: str,
38
+ path: str,
39
+ version=1,
40
+ metrics: Dict[str, int] = None,
41
+ result_type: ResultType = ResultType.EVALUATION,
42
+ **kwargs,
43
+ ) -> EvaluationResult:
27
44
  """Create and upload evaluation results to Azure evaluation service.
28
45
 
29
46
  This method uploads evaluation results from a local path to Azure Blob Storage
@@ -49,17 +66,20 @@ class EvaluationServiceOneDPClient:
49
66
  :raises: Various exceptions from the underlying API calls or upload process
50
67
  """
51
68
 
52
- LOGGER.debug(f"Creating evaluation result for {name} with version {version} type {result_type} from path {path}")
69
+ LOGGER.debug(
70
+ f"Creating evaluation result for {name} with version {version} type {result_type} from path {path}"
71
+ )
53
72
  start_pending_upload_response = self.rest_client.evaluation_results.start_pending_upload(
54
73
  name=name,
55
74
  version=version,
56
75
  body=PendingUploadRequest(pending_upload_type=PendingUploadType.TEMPORARY_BLOB_REFERENCE),
57
- **kwargs
76
+ **kwargs,
58
77
  )
59
78
 
60
79
  LOGGER.debug(f"Uploading {path} to {start_pending_upload_response.blob_reference_for_consumption.blob_uri}")
61
80
  with ContainerClient.from_container_url(
62
- start_pending_upload_response.blob_reference_for_consumption.credential.sas_uri) as container_client:
81
+ start_pending_upload_response.blob_reference_for_consumption.credential.sas_uri
82
+ ) as container_client:
63
83
  upload(path=path, container_client=container_client, logger=LOGGER)
64
84
 
65
85
  LOGGER.debug(f"Creating evaluation result version for {name} with version {version}")
@@ -73,7 +93,7 @@ class EvaluationServiceOneDPClient:
73
93
  ),
74
94
  name=name,
75
95
  version=version,
76
- **kwargs
96
+ **kwargs,
77
97
  )
78
98
 
79
99
  return create_version_response
@@ -90,10 +110,7 @@ class EvaluationServiceOneDPClient:
90
110
  :rtype: EvaluationUpload
91
111
  :raises: Various exceptions from the underlying API calls
92
112
  """
93
- upload_run_response = self.rest_client.evaluations.upload_run(
94
- evaluation=evaluation,
95
- **kwargs
96
- )
113
+ upload_run_response = self.rest_client.evaluations.upload_run(evaluation=evaluation, **kwargs)
97
114
 
98
115
  return upload_run_response
99
116
 
@@ -112,11 +129,7 @@ class EvaluationServiceOneDPClient:
112
129
  :rtype: EvaluationUpload
113
130
  :raises: Various exceptions from the underlying API calls
114
131
  """
115
- update_run_response = self.rest_client.evaluations.upload_update_run(
116
- name=name,
117
- evaluation=evaluation,
118
- **kwargs
119
- )
132
+ update_run_response = self.rest_client.evaluations.upload_update_run(name=name, evaluation=evaluation, **kwargs)
120
133
 
121
134
  return update_run_response
122
135
 
@@ -132,10 +145,7 @@ class EvaluationServiceOneDPClient:
132
145
  :rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
133
146
  :raises: Various exceptions from the underlying API calls
134
147
  """
135
- upload_run_response = self.rest_client.red_teams.upload_run(
136
- redteam=red_team,
137
- **kwargs
138
- )
148
+ upload_run_response = self.rest_client.red_teams.upload_run(redteam=red_team, **kwargs)
139
149
 
140
150
  return upload_run_response
141
151
 
@@ -154,10 +164,6 @@ class EvaluationServiceOneDPClient:
154
164
  :rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
155
165
  :raises: Various exceptions from the underlying API calls
156
166
  """
157
- update_run_response = self.rest_client.red_teams.upload_update_run(
158
- name=name,
159
- redteam=red_team,
160
- **kwargs
161
- )
167
+ update_run_response = self.rest_client.red_teams.upload_update_run(name=name, redteam=red_team, **kwargs)
162
168
 
163
- return update_run_response
169
+ return update_run_response
@@ -1,32 +1,32 @@
1
- # coding=utf-8
2
- # --------------------------------------------------------------------------
3
- # Copyright (c) Microsoft Corporation. All rights reserved.
4
- # Licensed under the MIT License. See License.txt in the project root for license information.
5
- # Code generated by Microsoft (R) Python Code Generator.
6
- # Changes may cause incorrect behavior and will be lost if the code is regenerated.
7
- # --------------------------------------------------------------------------
8
- # pylint: disable=wrong-import-position
9
-
10
- from typing import TYPE_CHECKING
11
-
12
- if TYPE_CHECKING:
13
- from ._patch import * # pylint: disable=unused-wildcard-import
14
-
15
- from ._client import AIProjectClient # type: ignore
16
- from ._version import VERSION
17
-
18
- __version__ = VERSION
19
-
20
- try:
21
- from ._patch import __all__ as _patch_all
22
- from ._patch import *
23
- except ImportError:
24
- _patch_all = []
25
- from ._patch import patch_sdk as _patch_sdk
26
-
27
- __all__ = [
28
- "AIProjectClient",
29
- ]
30
- __all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore
31
-
32
- _patch_sdk()
1
+ # coding=utf-8
2
+ # --------------------------------------------------------------------------
3
+ # Copyright (c) Microsoft Corporation. All rights reserved.
4
+ # Licensed under the MIT License. See License.txt in the project root for license information.
5
+ # Code generated by Microsoft (R) Python Code Generator.
6
+ # Changes may cause incorrect behavior and will be lost if the code is regenerated.
7
+ # --------------------------------------------------------------------------
8
+ # pylint: disable=wrong-import-position
9
+
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from ._patch import * # pylint: disable=unused-wildcard-import
14
+
15
+ from ._client import AIProjectClient # type: ignore
16
+ from ._version import VERSION
17
+
18
+ __version__ = VERSION
19
+
20
+ try:
21
+ from ._patch import __all__ as _patch_all
22
+ from ._patch import *
23
+ except ImportError:
24
+ _patch_all = []
25
+ from ._patch import patch_sdk as _patch_sdk
26
+
27
+ __all__ = [
28
+ "AIProjectClient",
29
+ ]
30
+ __all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore
31
+
32
+ _patch_sdk()