azure-ai-evaluation 1.4.0__tar.gz → 1.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (358) hide show
  1. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/CHANGELOG.md +28 -0
  2. {azure_ai_evaluation-1.4.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.6.0}/PKG-INFO +36 -2
  3. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/TROUBLESHOOTING.md +39 -1
  4. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/__init__.py +9 -16
  5. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_aoai/__init__.py +10 -0
  6. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
  7. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_aoai/label_grader.py +66 -0
  8. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
  9. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
  10. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_azure/_clients.py +4 -4
  11. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_azure/_envs.py +208 -0
  12. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_azure/_token_manager.py +12 -7
  13. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/__init__.py +5 -0
  14. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
  15. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  16. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_client.py +139 -0
  17. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
  18. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  19. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  20. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  21. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_types.py +21 -0
  22. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_validation.py +50 -0
  23. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  24. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_common/raiclient → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp}/_version.py +9 -9
  25. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  26. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
  27. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
  28. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  29. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
  30. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
  31. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
  32. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  33. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
  34. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
  35. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
  36. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  37. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
  38. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
  39. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  40. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  41. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  42. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  43. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  44. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  45. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  46. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  47. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  48. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  49. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  50. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  51. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  52. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  53. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  54. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  55. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  56. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/rai_service.py +159 -29
  57. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  58. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  59. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/utils.py +80 -2
  60. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_constants.py +16 -0
  61. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_converters/_ai_services.py +4 -4
  62. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_eval_mapping.py +71 -0
  63. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
  64. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  65. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +17 -4
  66. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  67. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  68. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
  69. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_evaluate.py +372 -105
  70. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
  71. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +95 -0
  72. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_utils.py +120 -7
  73. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +9 -4
  74. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
  75. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
  76. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
  77. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
  78. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
  79. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
  80. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +2 -2
  81. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
  82. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
  83. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +8 -2
  84. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  85. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
  86. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
  87. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
  88. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_exceptions.py +2 -0
  89. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  90. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  91. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  92. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  93. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  94. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  95. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  96. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  97. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  98. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  99. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  100. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  101. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
  102. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +129 -0
  103. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +7 -1
  104. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
  105. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
  106. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
  107. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  108. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
  109. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
  110. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
  111. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
  112. azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  113. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
  114. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
  115. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
  116. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_version.py +1 -1
  117. azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team/__init__.py +19 -0
  118. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_attack_objective_generator.py +3 -0
  119. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_attack_strategy.py +4 -1
  120. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_red_team.py +885 -481
  121. azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  122. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_utils/constants.py +2 -1
  123. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_utils/formatting_utils.py +23 -22
  124. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_utils/logging_utils.py +1 -1
  125. azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
  126. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_utils/strategy_utils.py +9 -5
  127. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
  128. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_constants.py +1 -0
  129. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
  130. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
  131. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
  132. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  133. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
  134. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  135. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
  136. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
  137. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
  138. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
  139. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  140. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_simulator.py +1 -1
  141. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0/azure_ai_evaluation.egg-info}/PKG-INFO +36 -2
  142. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure_ai_evaluation.egg-info/SOURCES.txt +89 -16
  143. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure_ai_evaluation.egg-info/requires.txt +7 -1
  144. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/pyproject.toml +3 -3
  145. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/instructions.md +1 -1
  146. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/response_completeness.ipynb +27 -0
  147. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/task_adherence.ipynb +1 -1
  148. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/evaluation_samples_evaluate.py +53 -0
  149. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/evaluation_samples_threshold.py +59 -0
  150. azure_ai_evaluation-1.6.0/samples/red_team_agent_tool_sample.py +170 -0
  151. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/red_team_samples.py +30 -30
  152. azure_ai_evaluation-1.6.0/samples/semantic_kernel_red_team_agent_sample.py +98 -0
  153. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/setup.py +12 -1
  154. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/conftest.py +40 -38
  155. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +3 -3
  156. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_adv_simulator.py +23 -18
  157. azure_ai_evaluation-1.6.0/tests/e2etests/test_aoai_graders.py +198 -0
  158. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_builtin_evaluators.py +227 -87
  159. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_evaluate.py +2 -1
  160. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_lite_management_client.py +3 -3
  161. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_mass_evaluate.py +199 -72
  162. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_metrics_upload.py +10 -3
  163. azure_ai_evaluation-1.6.0/tests/e2etests/test_remote_evaluation.py +101 -0
  164. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_sim_and_eval.py +52 -56
  165. azure_ai_evaluation-1.6.0/tests/unittests/test_aoai_integration_features.py +168 -0
  166. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_batch_run_context.py +6 -3
  167. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_completeness_evaluator.py +29 -0
  168. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_content_safety_rai_script.py +2 -3
  169. azure_ai_evaluation-1.6.0/tests/unittests/test_document_retrieval_evaluator.py +229 -0
  170. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_eval_run.py +1 -0
  171. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluate.py +117 -10
  172. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +1 -1
  173. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_attack_strategy.py +1 -1
  174. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_callback_chat_target.py +2 -2
  175. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_constants.py +3 -3
  176. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_formatting_utils.py +13 -21
  177. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_red_team.py +218 -147
  178. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_red_team_result.py +36 -36
  179. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/test_strategy_utils.py +16 -12
  180. azure_ai_evaluation-1.6.0/tests/unittests/test_remote_evaluation_features.py +66 -0
  181. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_safety_evaluation.py +91 -0
  182. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_save_eval.py +13 -1
  183. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -179
  184. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -99
  185. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -23
  186. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -105
  187. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -59
  188. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
  189. azure_ai_evaluation-1.4.0/azure/ai/evaluation/simulator/_tracing.py +0 -89
  190. azure_ai_evaluation-1.4.0/tests/unittests/test_evaluate_telemetry.py +0 -168
  191. azure_ai_evaluation-1.4.0/tests/unittests/test_evaluators/apology_dag/apology.py +0 -8
  192. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/MANIFEST.in +0 -0
  193. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/NOTICE.txt +0 -0
  194. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/README.md +0 -0
  195. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/__init__.py +0 -0
  196. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/__init__.py +0 -0
  197. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  198. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_azure/_models.py +0 -0
  199. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  200. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/constants.py +0 -0
  201. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/math.py +0 -0
  202. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_common/raiclient → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_common/onedp}/py.typed +0 -0
  203. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
  204. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  205. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  206. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  207. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  208. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  209. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  210. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  211. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  212. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  213. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  214. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  215. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  216. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  217. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  218. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  219. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  220. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  221. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
  222. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  223. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_converters}/__init__.py +0 -0
  224. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_converters/_models.py +0 -0
  225. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_converters → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluate}/__init__.py +0 -0
  226. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  227. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  228. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluate → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_evaluators}/__init__.py +0 -0
  229. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  230. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
  231. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  232. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
  233. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  234. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
  235. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  236. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  237. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  238. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  239. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
  240. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
  241. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
  242. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
  243. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
  244. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  245. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  246. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  247. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
  248. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  249. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  250. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  251. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  252. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  253. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  254. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  255. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  256. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
  257. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  258. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  259. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  260. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  261. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  262. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
  263. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  264. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  265. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  266. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  267. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
  268. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  269. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  270. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
  271. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  272. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  273. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  274. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  275. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  276. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
  277. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  278. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
  279. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  280. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
  281. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  282. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  283. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_http_utils.py +0 -0
  284. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluators → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy}/__init__.py +0 -0
  285. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  286. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
  287. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  288. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  289. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  290. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_common}/__init__.py +0 -0
  291. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy/_batch_engine → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_legacy/_common}/_logging.py +0 -0
  292. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  293. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  294. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  295. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team/_utils → azure_ai_evaluation-1.6.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
  296. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  297. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_user_agent.py +0 -0
  298. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_vendor/__init__.py +0 -0
  299. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  300. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  301. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  302. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  303. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  304. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/py.typed +0 -0
  305. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_callback_chat_target.py +0 -0
  306. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team}/_default_converter.py +0 -0
  307. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_safety_evaluation → azure_ai_evaluation-1.6.0/azure/ai/evaluation/red_team/_utils}/__init__.py +0 -0
  308. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  309. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  310. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  311. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -0
  312. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  313. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  314. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  315. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  316. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  317. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  318. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  319. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  320. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  321. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  322. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  323. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/migration_guide.md +0 -0
  324. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/README.md +0 -0
  325. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
  326. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  327. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  328. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
  329. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/agent_evaluators/user_functions.py +0 -0
  330. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/data/evaluate_test_data.jsonl +0 -0
  331. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/evaluation_samples_common.py +0 -0
  332. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/evaluation_samples_safety_evaluation.py +0 -0
  333. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/samples/evaluation_samples_simulate.py +0 -0
  334. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/setup.cfg +0 -0
  335. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/__init__.py +0 -0
  336. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/__openai_patcher.py +0 -0
  337. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
  338. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
  339. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/__init__.py +0 -0
  340. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  341. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/target_fn.py +0 -0
  342. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/e2etests/test_prompty_async.py +0 -0
  343. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_agent_evaluators.py +0 -0
  344. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_built_in_evaluator.py +0 -0
  345. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  346. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluate_performance.py +0 -0
  347. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  348. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
  349. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  350. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
  351. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
  352. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  353. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  354. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_redteam/__init__.py +0 -0
  355. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_simulator.py +0 -0
  356. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  357. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  358. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.6.0}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,33 @@
1
1
  # Release History
2
2
 
3
+ ## 1.6.0 (2025-05-07)
4
+
5
+ ### Features Added
6
+ - New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
7
+ - Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
8
+ - AzureOpenAIGrader (general class for experienced users)
9
+ - AzureOpenAILabelGrader
10
+ - AzureOpenAIStringCheckGrader
11
+ - AzureOpenAITextSimilarityGrader
12
+
13
+ ### Breaking Changes
14
+ - In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
15
+
16
+ ### Bugs Fixed
17
+ - Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
18
+ ```
19
+ "query1": "some query", "response": "some response"
20
+ ```
21
+ throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
22
+ Now, users may import data containing fields with numeric characters.
23
+
24
+
25
+ ## 1.5.0 (2025-04-04)
26
+
27
+ ### Features Added
28
+
29
+ - New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
30
+
3
31
  ## 1.4.0 (2025-03-27)
4
32
 
5
33
  ### Features Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.4.0
3
+ Version: 1.6.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -28,8 +28,14 @@ Requires-Dist: azure-identity>=1.16.0
28
28
  Requires-Dist: azure-core>=1.30.2
29
29
  Requires-Dist: nltk>=3.9.1
30
30
  Requires-Dist: azure-storage-blob>=12.10.0
31
+ Requires-Dist: httpx>=0.25.1
32
+ Requires-Dist: pandas<3.0.0,>=2.1.2
33
+ Requires-Dist: openai>=1.73.0
34
+ Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
35
+ Requires-Dist: msrest>=0.6.21
36
+ Requires-Dist: Jinja2>=3.1.6
31
37
  Provides-Extra: redteam
32
- Requires-Dist: pyrit>=0.8.0; extra == "redteam"
38
+ Requires-Dist: pyrit==0.8.1; extra == "redteam"
33
39
 
34
40
  # Azure AI Evaluation client library for Python
35
41
 
@@ -376,6 +382,34 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
376
382
 
377
383
  # Release History
378
384
 
385
+ ## 1.6.0 (2025-05-07)
386
+
387
+ ### Features Added
388
+ - New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
389
+ - Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
390
+ - AzureOpenAIGrader (general class for experienced users)
391
+ - AzureOpenAILabelGrader
392
+ - AzureOpenAIStringCheckGrader
393
+ - AzureOpenAITextSimilarityGrader
394
+
395
+ ### Breaking Changes
396
+ - In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
397
+
398
+ ### Bugs Fixed
399
+ - Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
400
+ ```
401
+ "query1": "some query", "response": "some response"
402
+ ```
403
+ throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
404
+ Now, users may import data containing fields with numeric characters.
405
+
406
+
407
+ ## 1.5.0 (2025-04-04)
408
+
409
+ ### Features Added
410
+
411
+ - New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
412
+
379
413
  ## 1.4.0 (2025-03-27)
380
414
 
381
415
  ### Features Added
@@ -6,11 +6,18 @@ This guide walks you through how to investigate failures, common errors in the `
6
6
 
7
7
  - [Handle Evaluate API Errors](#handle-evaluate-api-errors)
8
8
  - [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues)
9
+ - [Troubleshoot Column Mapping Issues](#troubleshoot-column-mapping-issues)
9
10
  - [Troubleshoot Safety Evaluator Issues](#troubleshoot-safety-evaluator-issues)
11
+ - [Troubleshoot Quality Evaluator Issues](#troubleshoot-quality-evaluator-issues)
10
12
  - [Handle Simulation Errors](#handle-simulation-errors)
11
13
  - [Adversarial Simulation Supported Regions](#adversarial-simulation-supported-regions)
14
+ - [Need to generate simulations for specific harm type](#need-to-generate-simulations-for-specific-harm-type)
15
+ - [Simulator is slow](#simulator-is-slow)
16
+ - [Handle RedTeam Errors](#handle-redteam-errors)
17
+ - [Target resource not found](#target-resource-not-found)
18
+ - [Insufficient Storage Permissions](#insufficient-storage-permissions)
12
19
  - [Logging](#logging)
13
- - [Get additional help](#get-additional-help)
20
+ - [Get Additional Help](#get-additional-help)
14
21
 
15
22
  ## Handle Evaluate API Errors
16
23
 
@@ -30,11 +37,18 @@ This guide walks you through how to investigate failures, common errors in the `
30
37
 
31
38
  - Additionally, if you're using a virtual network or private link, and your evaluation run upload fails because of that, check out this [guide](https://docs.microsoft.com/azure/machine-learning/how-to-enable-studio-virtual-network#access-data-using-the-studio).
32
39
 
40
+ ### Troubleshoot Column Mapping Issues
41
+
42
+ - When using `column_mapping` parameter in evaluators, ensure all keys and values are non-empty strings and contain only alphanumeric characters. Empty strings, non-string values, or non-alphanumeric characters can cause serialization errors and issues in downstream applications. Example of valid mapping: `{"query": "${data.query}", "response": "${data.response}"}`.
43
+
33
44
  ### Troubleshoot Safety Evaluator Issues
34
45
 
35
46
  - Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport).
36
47
  - If you encounter a 403 Unauthorized error when using safety evaluators, verify that you have the `Contributor` role assigned to your Azure AI project. `Contributor` role is currently required to run safety evaluations.
37
48
 
49
+ ### Troubleshoot Quality Evaluator Issues
50
+ - For `ToolCallAccuracyEvaluator`, if your input did not have a tool to evaluate, the current behavior is to output `null`.
51
+
38
52
  ## Handle Simulation Errors
39
53
 
40
54
  ### Adversarial Simulation Supported Regions
@@ -51,6 +65,30 @@ The Adversarial simulator does not support selecting individual harms, instead w
51
65
  Identify the type of simulations being run (adversarial or non-adversarial).
52
66
  Adjust parameters such as `api_call_retry_sleep_sec`, `api_call_delay_sec`, and `concurrent_async_task`. Please note that rate limits to llm calls can be both tokens per minute and requests per minute.
53
67
 
68
+ ## Handle RedTeam errors
69
+
70
+ ### Target resource not found
71
+ When initializing an Azure OpenAI model directly as `target` for a `RedTeam` scan, ensure `azure_endpoint` is specified in the format `https://<hub>.openai.azure.com/openai/deployments/<deployment_name>/chat/completions?api-version=2025-01-01-preview`. If using `AzureOpenAI`, `endpoint` should be specified in the format `https://<hub>.openai.azure.com/`.
72
+
73
+ ### Insufficient Storage Permissions
74
+ If you see an error like `WARNING: Failed to log artifacts to MLFlow: (UserError) Failed to upload evaluation run to the cloud due to insufficient permission to access the storage`, you need to ensure that proper permissions are assigned to the storage account linked to your Azure AI Project.
75
+
76
+ To fix this issue:
77
+ 1. Open the associated resource group being used in your Azure AI Project in the Azure Portal
78
+ 2. Look up the storage accounts associated with that resource group
79
+ 3. Open each storage account and click on "Access control (IAM)" on the left side navigation
80
+ 4. Add permissions for the desired users with the "Storage Blob Data Contributor" role
81
+
82
+ If you have Azure CLI, you can use the following command:
83
+
84
+ ```Shell
85
+ # <mySubscriptionID>: Subscription ID of the Azure AI Studio hub's linked storage account (available in Azure AI hub resource view in Azure Portal).
86
+ # <myResourceGroupName>: Resource group of the Azure AI Studio hub's linked storage account.
87
+ # <user-id>: User object ID for role assignment (retrieve with "az ad user show" command).
88
+
89
+ az role assignment create --role "Storage Blob Data Contributor" --scope /subscriptions/<mySubscriptionID>/resourceGroups/<myResourceGroupName> --assignee-principal-type User --assignee-object-id "<user-id>"
90
+ ```
91
+
54
92
  ## Logging
55
93
 
56
94
  You can set logging level via environment variable `PF_LOGGING_LEVEL`, valid values includes `CRITICAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`, default to `INFO`.
@@ -40,6 +40,11 @@ from ._model_configurations import (
40
40
  Message,
41
41
  OpenAIModelConfiguration,
42
42
  )
43
+ from ._aoai.aoai_grader import AzureOpenAIGrader
44
+ from ._aoai.label_grader import AzureOpenAILabelGrader
45
+ from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
46
+ from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
47
+
43
48
 
44
49
  _patch_all = []
45
50
 
@@ -52,22 +57,6 @@ try:
52
57
  except ImportError:
53
58
  print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
54
59
 
55
- # RedTeam requires a dependency on pyrit, but python 3.9 is not supported by pyrit.
56
- # So we only import it if it's available and the user has pyrit.
57
- try:
58
- from ._red_team._red_team import RedTeam
59
- from ._red_team._attack_strategy import AttackStrategy
60
- from ._red_team._attack_objective_generator import RiskCategory
61
- from ._red_team._red_team_result import RedTeamOutput
62
- _patch_all.extend([
63
- "RedTeam",
64
- "RedTeamOutput",
65
- "AttackStrategy",
66
- "RiskCategory",
67
- ])
68
- except ImportError:
69
- print("[INFO] Could not import RedTeam. Please install the dependency with `pip install azure-ai-evaluation[redteam]`.")
70
-
71
60
 
72
61
  __all__ = [
73
62
  "evaluate",
@@ -105,6 +94,10 @@ __all__ = [
105
94
  "CodeVulnerabilityEvaluator",
106
95
  "UngroundedAttributesEvaluator",
107
96
  "ToolCallAccuracyEvaluator",
97
+ "AzureOpenAIGrader",
98
+ "AzureOpenAILabelGrader",
99
+ "AzureOpenAIStringCheckGrader",
100
+ "AzureOpenAITextSimilarityGrader",
108
101
  ]
109
102
 
110
103
  __all__.extend([p for p in _patch_all if p not in __all__])
@@ -0,0 +1,10 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+
6
+ from .aoai_grader import AzureOpenAIGrader
7
+
8
+ __all__ = [
9
+ "AzureOpenAIGrader",
10
+ ]
@@ -0,0 +1,89 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
5
+
6
+ from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION
7
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
8
+ from typing import Any, Dict, Union
9
+ from azure.ai.evaluation._common._experimental import experimental
10
+
11
+
12
+ @experimental
13
+ class AzureOpenAIGrader():
14
+ """
15
+ Base class for Azure OpenAI grader wrappers, recommended only for use by experienced OpenAI API users.
16
+ Combines a model configuration and any grader configuration
17
+ into a singular object that can be used in evaluations.
18
+
19
+ Supplying an AzureOpenAIGrader to the `evaluate` method will cause an asynchronous request to evaluate
20
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
21
+ evaluation results.
22
+
23
+ :param model_config: The model configuration to use for the grader.
24
+ :type model_config: Union[
25
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
26
+ ~azure.ai.evaluation.OpenAIModelConfiguration
27
+ ]
28
+ :param grader_config: The grader configuration to use for the grader. This is expected
29
+ to be formatted as a dictionary that matches the specifications of the sub-types of
30
+ the TestingCriterion alias specified in (OpenAI's SDK)[https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L151].
31
+ :type grader_config: Dict[str, Any]
32
+ :param kwargs: Additional keyword arguments to pass to the grader.
33
+ :type kwargs: Any
34
+
35
+
36
+ """
37
+
38
+ id = "aoai://general"
39
+
40
+ def __init__(self, *, model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], grader_config: Dict[str, Any], **kwargs: Any):
41
+ self._model_config = model_config
42
+ self._grader_config = grader_config
43
+
44
+ if kwargs.get("validate", True):
45
+ self._validate_model_config()
46
+ self._validate_grader_config()
47
+
48
+
49
+
50
+ def _validate_model_config(self) -> None:
51
+ """Validate the model configuration that this grader wrapper is using."""
52
+ if "api_key" not in self._model_config or not self._model_config.get("api_key"):
53
+ msg = f"{type(self).__name__}: Requires an api_key in the supplied model_config."
54
+ raise EvaluationException(
55
+ message=msg,
56
+ blame=ErrorBlame.USER_ERROR,
57
+ category=ErrorCategory.INVALID_VALUE,
58
+ target=ErrorTarget.AOAI_GRADER,
59
+ )
60
+
61
+ def _validate_grader_config(self) -> None:
62
+ """Validate the grader configuration that this grader wrapper is using."""
63
+
64
+ return
65
+
66
+ def get_client(self) -> Any:
67
+ """Construct an appropriate OpenAI client using this grader's model configuration.
68
+ Returns a slightly different client depending on whether or not this grader's model
69
+ configuration is for Azure OpenAI or OpenAI.
70
+
71
+ :return: The OpenAI client.
72
+ :rtype: [~openai.OpenAI, ~openai.AzureOpenAI]
73
+ """
74
+ if "azure_endpoint" in self._model_config:
75
+ from openai import AzureOpenAI
76
+ # TODO set default values?
77
+ return AzureOpenAI(
78
+ azure_endpoint=self._model_config["azure_endpoint"],
79
+ api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
80
+ api_version=self._model_config.get("api_version", DEFAULT_AOAI_API_VERSION),
81
+ azure_deployment=self._model_config.get("azure_deployment", ""),
82
+ )
83
+ from openai import OpenAI
84
+ # TODO add default values for base_url and organization?
85
+ return OpenAI(
86
+ api_key=self._model_config["api_key"],
87
+ base_url=self._model_config.get("base_url", ""),
88
+ organization=self._model_config.get("organization", ""),
89
+ )
@@ -0,0 +1,66 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Union, List
5
+
6
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
+ from openai.types.eval_create_params import TestingCriterionLabelModel
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+
10
+ from .aoai_grader import AzureOpenAIGrader
11
+
12
+ @experimental
13
+ class AzureOpenAILabelGrader(AzureOpenAIGrader):
14
+ """
15
+ Wrapper class for OpenAI's label model graders.
16
+
17
+ Supplying a LabelGrader to the `evaluate` method will cause an asynchronous request to evaluate
18
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
19
+ evaluation results.
20
+
21
+ :param model_config: The model configuration to use for the grader.
22
+ :type model_config: Union[
23
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
24
+ ~azure.ai.evaluation.OpenAIModelConfiguration
25
+ ]
26
+ :param input: The list of label-based testing criterion for this grader. Individual
27
+ values of this list are expected to be dictionaries that match the format of any of the valid
28
+ (TestingCriterionLabelModelInput)[https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L125C1-L125C32]
29
+ subtypes.
30
+ :type input: List[Dict[str, str]]
31
+ :param labels: A list of strings representing the classification labels of this grader.
32
+ :type labels: List[str]
33
+ :param model: The model to use for the evaluation. Must support structured outputs.
34
+ :type model: str
35
+ :param name: The name of the grader.
36
+ :type name: str
37
+ :param passing_labels: The labels that indicate a passing result. Must be a subset of labels.
38
+ :type passing_labels: List[str]
39
+ :param kwargs: Additional keyword arguments to pass to the grader.
40
+ :type kwargs: Any
41
+
42
+
43
+ """
44
+
45
+ id = "aoai://label_model"
46
+
47
+ def __init__(
48
+ self,
49
+ *,
50
+ model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
51
+ input: List[Dict[str, str]],
52
+ labels: List[str],
53
+ model: str,
54
+ name: str,
55
+ passing_labels: List[str],
56
+ **kwargs: Any
57
+ ):
58
+ grader = TestingCriterionLabelModel(
59
+ input=input,
60
+ labels=labels,
61
+ model=model,
62
+ name=name,
63
+ passing_labels=passing_labels,
64
+ type="label_model",
65
+ )
66
+ super().__init__(model_config=model_config, grader_config=grader, **kwargs)
@@ -0,0 +1,65 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Union
5
+ from typing_extensions import Literal
6
+
7
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8
+ from openai.types.eval_string_check_grader import EvalStringCheckGrader
9
+ from azure.ai.evaluation._common._experimental import experimental
10
+
11
+ from .aoai_grader import AzureOpenAIGrader
12
+
13
+ @experimental
14
+ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
15
+ """
16
+ Wrapper class for OpenAI's string check graders.
17
+
18
+ Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
19
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
20
+ evaluation results.
21
+
22
+ :param model_config: The model configuration to use for the grader.
23
+ :type model_config: Union[
24
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
25
+ ~azure.ai.evaluation.OpenAIModelConfiguration
26
+ ]
27
+ :param input: The input text. This may include template strings.
28
+ :type input: str
29
+ :param name: The name of the grader.
30
+ :type name: str
31
+ :param operation: The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
32
+ :type operation: Literal["eq", "ne", "like", "ilike"]
33
+ :param reference: The reference text. This may include template strings.
34
+ :type reference: str
35
+ :param kwargs: Additional keyword arguments to pass to the grader.
36
+ :type kwargs: Any
37
+
38
+
39
+ """
40
+
41
+ id = "aoai://string_check"
42
+
43
+ def __init__(
44
+ self,
45
+ *,
46
+ model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
47
+ input: str,
48
+ name: str,
49
+ operation: Literal[
50
+ "eq",
51
+ "ne",
52
+ "like",
53
+ "ilike",
54
+ ],
55
+ reference: str,
56
+ **kwargs: Any
57
+ ):
58
+ grader = EvalStringCheckGrader(
59
+ input=input,
60
+ name=name,
61
+ operation=operation,
62
+ reference=reference,
63
+ type="string_check",
64
+ )
65
+ super().__init__(model_config=model_config, grader_config=grader, **kwargs)
@@ -0,0 +1,88 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Union
5
+ from typing_extensions import Literal
6
+
7
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8
+ from openai.types.eval_text_similarity_grader import EvalTextSimilarityGrader
9
+ from azure.ai.evaluation._common._experimental import experimental
10
+
11
+ from .aoai_grader import AzureOpenAIGrader
12
+
13
+ @experimental
14
+ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
15
+ """
16
+ Wrapper class for OpenAI's string check graders.
17
+
18
+ Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
19
+ the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
20
+ evaluation results.
21
+
22
+ :param model_config: The model configuration to use for the grader.
23
+ :type model_config: Union[
24
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
25
+ ~azure.ai.evaluation.OpenAIModelConfiguration
26
+ ]
27
+ :param evaluation_metric: The evaluation metric to use.
28
+ :type evaluation_metric: Literal[
29
+ "fuzzy_match",
30
+ "bleu",
31
+ "gleu",
32
+ "meteor",
33
+ "rouge_1",
34
+ "rouge_2",
35
+ "rouge_3",
36
+ "rouge_4",
37
+ "rouge_5",
38
+ "rouge_l",
39
+ "cosine",
40
+ ]
41
+ :param input: The text being graded.
42
+ :type input: str
43
+ :param pass_threshold: A float score where a value greater than or equal indicates a passing grade.
44
+ :type pass_threshold: float
45
+ :param reference: The text being graded against.
46
+ :type reference: str
47
+ :param name: The name of the grader.
48
+ :type name: str
49
+ :param kwargs: Additional keyword arguments to pass to the grader.
50
+ :type kwargs: Any
51
+
52
+
53
+ """
54
+
55
+ id = "aoai://text_similarity"
56
+
57
+ def __init__(
58
+ self,
59
+ *,
60
+ model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
61
+ evaluation_metric: Literal[
62
+ "fuzzy_match",
63
+ "bleu",
64
+ "gleu",
65
+ "meteor",
66
+ "rouge_1",
67
+ "rouge_2",
68
+ "rouge_3",
69
+ "rouge_4",
70
+ "rouge_5",
71
+ "rouge_l",
72
+ "cosine",
73
+ ],
74
+ input: str,
75
+ pass_threshold: float,
76
+ reference: str,
77
+ name: str,
78
+ **kwargs: Any
79
+ ):
80
+ grader = EvalTextSimilarityGrader(
81
+ evaluation_metric=evaluation_metric,
82
+ input=input,
83
+ pass_threshold=pass_threshold,
84
+ name=name,
85
+ reference=reference,
86
+ type="text_similarity",
87
+ )
88
+ super().__init__(model_config=model_config, grader_config=grader, **kwargs)
@@ -8,12 +8,12 @@ from threading import Lock
8
8
  from urllib.parse import quote
9
9
  from json.decoder import JSONDecodeError
10
10
 
11
- from azure.core.credentials import TokenCredential, AzureSasCredential
11
+ from azure.core.credentials import TokenCredential, AzureSasCredential, AccessToken
12
12
  from azure.core.rest import HttpResponse
13
13
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
14
14
  from azure.ai.evaluation._http_utils import HttpPipeline, get_http_client
15
15
  from azure.ai.evaluation._azure._token_manager import AzureMLTokenManager
16
- from azure.ai.evaluation.simulator._model_tools._identity_manager import TokenScope
16
+ from azure.ai.evaluation._constants import TokenScope
17
17
  from ._models import BlobStoreInfo, Workspace
18
18
 
19
19
 
@@ -61,7 +61,7 @@ class LiteMLClient:
61
61
  self._token_manager: Optional[AzureMLTokenManager] = None
62
62
  self._credential: Optional[TokenCredential] = credential
63
63
 
64
- def get_token(self) -> str:
64
+ def get_token(self) -> AccessToken:
65
65
  return self._get_token_manager().get_token()
66
66
 
67
67
  def get_credential(self) -> TokenCredential:
@@ -201,4 +201,4 @@ class LiteMLClient:
201
201
  return url
202
202
 
203
203
  def _get_headers(self) -> Dict[str, str]:
204
- return {"Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json"}
204
+ return {"Authorization": f"Bearer {self.get_token().token}", "Content-Type": "application/json"}