azure-ai-evaluation 1.9.0__tar.gz → 1.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (397) hide show
  1. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/CHANGELOG.md +47 -0
  2. {azure_ai_evaluation-1.9.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.11.0}/PKG-INFO +63 -4
  3. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/TROUBLESHOOTING.md +0 -3
  4. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/__init__.py +46 -12
  5. azure_ai_evaluation-1.11.0/azure/ai/evaluation/_aoai/python_grader.py +84 -0
  6. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_aoai/score_model_grader.py +1 -0
  7. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/models/_models.py +5 -0
  8. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/rai_service.py +3 -3
  9. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/utils.py +74 -17
  10. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_converters/_ai_services.py +60 -10
  11. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_converters/_models.py +75 -26
  12. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +70 -22
  13. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_eval_run.py +14 -1
  14. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_evaluate.py +163 -44
  15. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +79 -33
  16. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_utils.py +5 -2
  17. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  18. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +8 -1
  19. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +3 -2
  20. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +143 -25
  21. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +7 -2
  22. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +19 -9
  23. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -5
  24. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +4 -1
  25. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +4 -1
  26. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +5 -2
  27. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +4 -1
  28. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +3 -0
  29. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -0
  30. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +1 -1
  31. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -2
  32. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  33. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +114 -4
  34. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +9 -3
  35. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -1
  36. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +8 -1
  37. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +1 -1
  38. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +56 -3
  39. azure_ai_evaluation-1.11.0/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +181 -0
  40. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +11 -3
  41. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +3 -2
  42. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
  43. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +2 -1
  44. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -2
  45. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +24 -12
  46. azure_ai_evaluation-1.11.0/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +405 -0
  47. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +214 -187
  48. azure_ai_evaluation-1.11.0/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  49. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +8 -1
  50. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +4 -1
  51. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_exceptions.py +1 -0
  52. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +6 -3
  53. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +115 -30
  54. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +2 -0
  55. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +2 -2
  56. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +28 -31
  57. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +2 -0
  58. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_version.py +1 -1
  59. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/__init__.py +4 -3
  60. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_attack_objective_generator.py +17 -0
  61. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_callback_chat_target.py +14 -1
  62. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_evaluation_processor.py +376 -0
  63. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_mlflow_integration.py +322 -0
  64. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_orchestrator_manager.py +661 -0
  65. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_red_team.py +1164 -0
  66. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_red_team_result.py +6 -0
  67. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_result_processor.py +610 -0
  68. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  69. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +11 -4
  70. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +6 -0
  71. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/constants.py +0 -2
  72. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  73. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  74. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +115 -13
  75. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/metric_mapping.py +24 -4
  76. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  77. azure_ai_evaluation-1.11.0/azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  78. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +17 -4
  79. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +14 -2
  80. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +13 -1
  81. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +21 -7
  82. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +24 -5
  83. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_simulator.py +12 -0
  84. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0/azure_ai_evaluation.egg-info}/PKG-INFO +63 -4
  85. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure_ai_evaluation.egg-info/SOURCES.txt +16 -0
  86. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure_ai_evaluation.egg-info/requires.txt +0 -2
  87. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +7 -4
  88. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/aoai_score_model_grader_sample.py +61 -7
  89. azure_ai_evaluation-1.11.0/samples/data/custom_objectives_with_context_example.json +51 -0
  90. azure_ai_evaluation-1.11.0/samples/evaluation_samples_common.py +128 -0
  91. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/evaluation_samples_evaluate.py +40 -27
  92. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/evaluation_samples_evaluate_fdp.py +7 -0
  93. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/evaluation_samples_threshold.py +16 -16
  94. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/red_team_samples.py +56 -0
  95. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/setup.py +0 -2
  96. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/conftest.py +59 -1
  97. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/converters/ai_agent_converter/serialization_helper.py +6 -1
  98. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +4 -4
  99. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_builtin_evaluators.py +54 -20
  100. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_evaluate.py +7 -7
  101. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_mass_evaluate.py +1 -1
  102. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_metrics_upload.py +4 -0
  103. azure_ai_evaluation-1.11.0/tests/e2etests/test_red_team.py +379 -0
  104. azure_ai_evaluation-1.11.0/tests/unittests/test_agent_evaluators.py +105 -0
  105. azure_ai_evaluation-1.11.0/tests/unittests/test_aoai_alignment_missing_rows.py +90 -0
  106. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_aoai_evaluation_pagination.py +13 -5
  107. azure_ai_evaluation-1.11.0/tests/unittests/test_aoai_python_grader.py +54 -0
  108. azure_ai_evaluation-1.11.0/tests/unittests/test_built_in_evaluator.py +254 -0
  109. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_eval_run.py +291 -1
  110. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluate.py +331 -12
  111. azure_ai_evaluation-1.11.0/tests/unittests/test_evaluate_mismatch.py +488 -0
  112. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluate_performance.py +2 -3
  113. azure_ai_evaluation-1.11.0/tests/unittests/test_lazy_imports.py +135 -0
  114. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +4 -0
  115. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_callback_chat_target.py +77 -1
  116. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +1 -1
  117. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_red_team.py +279 -171
  118. azure_ai_evaluation-1.11.0/tests/unittests/test_redteam/test_red_team_language_support.py +213 -0
  119. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_red_team_result.py +6 -1
  120. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_strategy_utils.py +61 -1
  121. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_safety_evaluation.py +48 -0
  122. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_save_eval.py +1 -0
  123. azure_ai_evaluation-1.11.0/tests/unittests/test_tool_call_accuracy_evaluator.py +686 -0
  124. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_utils.py +212 -1
  125. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -100
  126. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -117
  127. azure_ai_evaluation-1.9.0/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -71
  128. azure_ai_evaluation-1.9.0/azure/ai/evaluation/red_team/_red_team.py +0 -3174
  129. azure_ai_evaluation-1.9.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  130. azure_ai_evaluation-1.9.0/samples/evaluation_samples_common.py +0 -60
  131. azure_ai_evaluation-1.9.0/tests/unittests/test_agent_evaluators.py +0 -102
  132. azure_ai_evaluation-1.9.0/tests/unittests/test_built_in_evaluator.py +0 -130
  133. azure_ai_evaluation-1.9.0/tests/unittests/test_tool_call_accuracy_evaluator.py +0 -417
  134. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/MANIFEST.in +0 -0
  135. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/NOTICE.txt +0 -0
  136. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/README.md +0 -0
  137. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/__init__.py +0 -0
  138. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/__init__.py +0 -0
  139. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_aoai/__init__.py +0 -0
  140. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_aoai/aoai_grader.py +0 -0
  141. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_aoai/label_grader.py +0 -0
  142. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_aoai/string_check_grader.py +0 -0
  143. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_aoai/text_similarity_grader.py +0 -0
  144. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  145. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
  146. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_azure/_envs.py +0 -0
  147. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_azure/_models.py +0 -0
  148. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  149. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/__init__.py +0 -0
  150. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  151. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/constants.py +0 -0
  152. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/evaluation_onedp_client.py +0 -0
  153. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/math.py +0 -0
  154. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/__init__.py +0 -0
  155. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_client.py +0 -0
  156. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_configuration.py +0 -0
  157. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
  158. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_patch.py +0 -0
  159. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
  160. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
  161. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_utils/__init__.py +0 -0
  162. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_utils/model_base.py +0 -0
  163. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_utils/serialization.py +0 -0
  164. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_validation.py +0 -0
  165. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
  166. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/_version.py +0 -0
  167. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/__init__.py +0 -0
  168. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/_client.py +0 -0
  169. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +0 -0
  170. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/_patch.py +0 -0
  171. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +0 -0
  172. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +0 -0
  173. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +0 -0
  174. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/models/__init__.py +0 -0
  175. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/models/_enums.py +0 -0
  176. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/models/_patch.py +0 -0
  177. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/operations/__init__.py +0 -0
  178. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/operations/_operations.py +0 -0
  179. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -0
  180. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
  181. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
  182. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
  183. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
  184. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
  185. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
  186. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
  187. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
  188. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
  189. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
  190. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
  191. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
  192. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
  193. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
  194. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
  195. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
  196. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
  197. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
  198. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  199. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  200. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  201. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  202. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  203. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
  204. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  205. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  206. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  207. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  208. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  209. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  210. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  211. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  212. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  213. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  214. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  215. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  216. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
  217. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  218. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
  219. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_constants.py +0 -0
  220. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_converters/__init__.py +0 -0
  221. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_converters/_sk_services.py +0 -0
  222. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_eval_mapping.py +0 -0
  223. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  224. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  225. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  226. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  227. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  228. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  229. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  230. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  231. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  232. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  233. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  234. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  235. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  236. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  237. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
  238. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  239. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  240. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +0 -0
  241. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  242. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  243. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  244. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  245. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  246. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  247. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  248. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  249. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  250. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
  251. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  252. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  253. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  254. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  255. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  256. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
  257. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  258. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  259. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  260. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  261. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  262. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  263. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  264. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  265. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  266. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  267. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_http_utils.py +0 -0
  268. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/__init__.py +0 -0
  269. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
  270. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
  271. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
  272. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
  273. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
  274. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
  275. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
  276. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
  277. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
  278. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
  279. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
  280. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
  281. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  282. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  283. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
  284. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  285. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
  286. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
  287. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
  288. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  289. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_common/__init__.py +0 -0
  290. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +0 -0
  291. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
  292. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +0 -0
  293. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  294. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
  295. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
  296. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
  297. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
  298. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  299. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  300. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
  301. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  302. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_user_agent.py +0 -0
  303. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_vendor/__init__.py +0 -0
  304. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  305. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  306. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  307. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  308. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  309. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/py.typed +0 -0
  310. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_agent/__init__.py +0 -0
  311. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +0 -0
  312. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +0 -0
  313. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +0 -0
  314. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +0 -0
  315. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_attack_strategy.py +0 -0
  316. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
  317. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +0 -0
  318. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
  319. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  320. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  321. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  322. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
  323. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
  324. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  325. {azure_ai_evaluation-1.9.0/azure/ai/evaluation/red_team/_utils → azure_ai_evaluation-1.11.0/azure/ai/evaluation/simulator/_data_sources}/__init__.py +0 -0
  326. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  327. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  328. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  329. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  330. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  331. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  332. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  333. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  334. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
  335. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  336. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  337. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  338. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  339. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  340. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  341. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  342. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  343. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/migration_guide.md +0 -0
  344. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/pyproject.toml +0 -0
  345. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/README.md +0 -0
  346. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
  347. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/instructions.md +0 -0
  348. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  349. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/response_completeness.ipynb +0 -0
  350. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  351. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/task_adherence.ipynb +0 -0
  352. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/agent_evaluators/user_functions.py +0 -0
  353. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/data/evaluate_test_data.jsonl +0 -0
  354. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/evaluation_samples_safety_evaluation.py +0 -0
  355. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/evaluation_samples_simulate.py +0 -0
  356. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/red_team_agent_tool_sample.py +0 -0
  357. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/red_team_skip_upload.py +0 -0
  358. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/samples/semantic_kernel_red_team_agent_sample.py +0 -0
  359. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/setup.cfg +0 -0
  360. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/__init__.py +0 -0
  361. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/__openai_patcher.py +0 -0
  362. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
  363. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +0 -0
  364. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +0 -0
  365. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/__init__.py +0 -0
  366. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  367. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/target_fn.py +0 -0
  368. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_adv_simulator.py +0 -0
  369. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_aoai_graders.py +0 -0
  370. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_lite_management_client.py +0 -0
  371. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_prompty_async.py +0 -0
  372. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_remote_evaluation.py +0 -0
  373. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/e2etests/test_sim_and_eval.py +0 -0
  374. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_aoai_integration_features.py +0 -0
  375. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_aoai_score_model_grader.py +0 -0
  376. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_batch_run_context.py +0 -0
  377. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_completeness_evaluator.py +0 -0
  378. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  379. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
  380. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_document_retrieval_evaluator.py +0 -0
  381. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  382. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
  383. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  384. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
  385. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
  386. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  387. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  388. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/__init__.py +0 -0
  389. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_attack_strategy.py +0 -0
  390. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_constants.py +0 -0
  391. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_formatting_utils.py +0 -0
  392. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_rai_service_target.py +0 -0
  393. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +0 -0
  394. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_remote_evaluation_features.py +0 -0
  395. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_simulator.py +0 -0
  396. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  397. {azure_ai_evaluation-1.9.0 → azure_ai_evaluation-1.11.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
@@ -1,5 +1,49 @@
1
1
  # Release History
2
2
 
3
+ ## 1.11.0 (2025-09-02)
4
+
5
+ ### Features Added
6
+ - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
7
+ - Added support for user-supplied TokenCredentials with LLM based evaluators.
8
+ - Enhanced `GroundednessEvaluator` to support AI agent evaluation with tool calls. The evaluator now accepts agent response data containing tool calls and can extract context from `file_search` tool results for groundedness assessment. This enables evaluation of AI agents that use tools to retrieve information and generate responses. Note: Agent groundedness evaluation is currently supported only when the `file_search` tool is used.
9
+ - Added `language` parameter to `RedTeam` class for multilingual red team scanning support. The parameter accepts values from `SupportedLanguages` enum including English, Spanish, French, German, Italian, Portuguese, Japanese, Korean, and Simplified Chinese, enabling red team attacks to be generated and conducted in multiple languages.
10
+ - Added support for IndirectAttack and UngroundedAttributes risk categories in `RedTeam` scanning. These new risk categories expand red team capabilities to detect cross-platform indirect attacks and evaluate ungrounded inferences about human attributes including emotional state and protected class information.
11
+
12
+ ### Bugs Fixed
13
+ - Fixed issue where evaluation results were not properly aligned with input data, leading to incorrect metrics being reported.
14
+
15
+ ### Other Changes
16
+ - Deprecating `AdversarialSimulator` in favor of the [AI Red Teaming Agent](https://aka.ms/airedteamingagent-sample). `AdversarialSimulator` will be removed in the next minor release.
17
+ - Moved retry configuration constants (`MAX_RETRY_ATTEMPTS`, `MAX_RETRY_WAIT_SECONDS`, `MIN_RETRY_WAIT_SECONDS`) from `RedTeam` class to new `RetryManager` class for better code organization and configurability.
18
+
19
+ ## 1.10.0 (2025-07-31)
20
+
21
+ ### Breaking Changes
22
+
23
+ - Added `evaluate_query` parameter to all RAI service evaluators that can be passed as a keyword argument. This parameter controls whether queries are included in evaluation data when evaluating query-response pairs. Previously, queries were always included in evaluations. When set to `True`, both query and response will be evaluated; when set to `False` (default), only the response will be evaluated. This parameter is available across all RAI service evaluators including `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator`, `ProtectedMaterialEvaluator`, `IndirectAttackEvaluator`, `CodeVulnerabilityEvaluator`, `UngroundedAttributesEvaluator`, `GroundednessProEvaluator`, and `EciEvaluator`. Existing code that relies on queries being evaluated will need to explicitly set `evaluate_query=True` to maintain the previous behavior.
24
+
25
+ ### Features Added
26
+
27
+ - Added support for Azure OpenAI Python grader via `AzureOpenAIPythonGrader` class, which serves as a wrapper around Azure Open AI Python grader configurations. This new grader object can be supplied to the main `evaluate` method as if it were a normal callable evaluator.
28
+ - Added `attack_success_thresholds` parameter to `RedTeam` class for configuring custom thresholds that determine attack success. This allows users to set specific threshold values for each risk category, with scores greater than the threshold considered successful attacks (i.e. higher threshold means higher
29
+ tolerance for harmful responses).
30
+ - Enhanced threshold reporting in RedTeam results to include default threshold values when custom thresholds aren't specified, providing better transparency about the evaluation criteria used.
31
+
32
+
33
+ ### Bugs Fixed
34
+
35
+ - Fixed red team scan `output_path` issue where individual evaluation results were overwriting each other instead of being preserved as separate files. Individual evaluations now create unique files while the user's `output_path` is reserved for final aggregated results.
36
+ - Significant improvements to TaskAdherence evaluator. New version has less variance, is much faster and consumes fewer tokens.
37
+ - Significant improvements to Relevance evaluator. New version has more concrete rubrics and has less variance, is much faster and consumes fewer tokens.
38
+
39
+
40
+ ### Other Changes
41
+
42
+ - The default engine for evaluation was changed from `promptflow` (PFClient) to an in-SDK batch client (RunSubmitterClient)
43
+ - Note: We've temporarily kept an escape hatch to fall back to the legacy `promptflow` implementation by setting `_use_pf_client=True` when invoking `evaluate()`.
44
+ This is due to be removed in a future release.
45
+
46
+
3
47
  ## 1.9.0 (2025-07-02)
4
48
 
5
49
  ### Features Added
@@ -11,8 +55,11 @@
11
55
  ### Bugs Fixed
12
56
 
13
57
  - Significant improvements to IntentResolution evaluator. New version has less variance, is nearly 2x faster and consumes fewer tokens.
58
+
59
+ - Fixes and improvements to ToolCallAccuracy evaluator. New version has less variance. and now works on all tool calls that happen in a turn at once. Previously, it worked on each tool call independently without having context on the other tool calls that happen in the same turn, and then aggregated the results to a score in the range [0-1]. The score range is now [1-5].
14
60
  - Fixed MeteorScoreEvaluator and other threshold-based evaluators returning incorrect binary results due to integer conversion of decimal scores. Previously, decimal scores like 0.9375 were incorrectly converted to integers (0) before threshold comparison, causing them to fail even when above the threshold. [#41415](https://github.com/Azure/azure-sdk-for-python/issues/41415)
15
61
  - Added a new enum `ADVERSARIAL_QA_DOCUMENTS` which moves all the "file_content" type prompts away from `ADVERSARIAL_QA` to the new enum
62
+ - `AzureOpenAIScoreModelGrader` evaluator now supports `pass_threshold` parameter to set the minimum score required for a response to be considered passing. This allows users to define custom thresholds for evaluation results, enhancing flexibility in grading AI model responses.
16
63
 
17
64
  ## 1.8.0 (2025-05-29)
18
65
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: azure-ai-evaluation
3
- Version: 1.9.0
3
+ Version: 1.11.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -21,8 +21,6 @@ Classifier: Operating System :: OS Independent
21
21
  Requires-Python: >=3.9
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: NOTICE.txt
24
- Requires-Dist: promptflow-devkit>=1.17.1
25
- Requires-Dist: promptflow-core>=1.17.1
26
24
  Requires-Dist: pyjwt>=2.8.0
27
25
  Requires-Dist: azure-identity>=1.16.0
28
26
  Requires-Dist: azure-core>=1.30.2
@@ -37,6 +35,20 @@ Requires-Dist: Jinja2>=3.1.6
37
35
  Requires-Dist: aiohttp>=3.0
38
36
  Provides-Extra: redteam
39
37
  Requires-Dist: pyrit==0.8.1; extra == "redteam"
38
+ Dynamic: author
39
+ Dynamic: author-email
40
+ Dynamic: classifier
41
+ Dynamic: description
42
+ Dynamic: description-content-type
43
+ Dynamic: home-page
44
+ Dynamic: keywords
45
+ Dynamic: license
46
+ Dynamic: license-file
47
+ Dynamic: project-url
48
+ Dynamic: provides-extra
49
+ Dynamic: requires-dist
50
+ Dynamic: requires-python
51
+ Dynamic: summary
40
52
 
41
53
  # Azure AI Evaluation client library for Python
42
54
 
@@ -400,6 +412,50 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
400
412
 
401
413
  # Release History
402
414
 
415
+ ## 1.11.0 (2025-09-02)
416
+
417
+ ### Features Added
418
+ - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
419
+ - Added support for user-supplied TokenCredentials with LLM based evaluators.
420
+ - Enhanced `GroundednessEvaluator` to support AI agent evaluation with tool calls. The evaluator now accepts agent response data containing tool calls and can extract context from `file_search` tool results for groundedness assessment. This enables evaluation of AI agents that use tools to retrieve information and generate responses. Note: Agent groundedness evaluation is currently supported only when the `file_search` tool is used.
421
+ - Added `language` parameter to `RedTeam` class for multilingual red team scanning support. The parameter accepts values from `SupportedLanguages` enum including English, Spanish, French, German, Italian, Portuguese, Japanese, Korean, and Simplified Chinese, enabling red team attacks to be generated and conducted in multiple languages.
422
+ - Added support for IndirectAttack and UngroundedAttributes risk categories in `RedTeam` scanning. These new risk categories expand red team capabilities to detect cross-platform indirect attacks and evaluate ungrounded inferences about human attributes including emotional state and protected class information.
423
+
424
+ ### Bugs Fixed
425
+ - Fixed issue where evaluation results were not properly aligned with input data, leading to incorrect metrics being reported.
426
+
427
+ ### Other Changes
428
+ - Deprecating `AdversarialSimulator` in favor of the [AI Red Teaming Agent](https://aka.ms/airedteamingagent-sample). `AdversarialSimulator` will be removed in the next minor release.
429
+ - Moved retry configuration constants (`MAX_RETRY_ATTEMPTS`, `MAX_RETRY_WAIT_SECONDS`, `MIN_RETRY_WAIT_SECONDS`) from `RedTeam` class to new `RetryManager` class for better code organization and configurability.
430
+
431
+ ## 1.10.0 (2025-07-31)
432
+
433
+ ### Breaking Changes
434
+
435
+ - Added `evaluate_query` parameter to all RAI service evaluators that can be passed as a keyword argument. This parameter controls whether queries are included in evaluation data when evaluating query-response pairs. Previously, queries were always included in evaluations. When set to `True`, both query and response will be evaluated; when set to `False` (default), only the response will be evaluated. This parameter is available across all RAI service evaluators including `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator`, `ProtectedMaterialEvaluator`, `IndirectAttackEvaluator`, `CodeVulnerabilityEvaluator`, `UngroundedAttributesEvaluator`, `GroundednessProEvaluator`, and `EciEvaluator`. Existing code that relies on queries being evaluated will need to explicitly set `evaluate_query=True` to maintain the previous behavior.
436
+
437
+ ### Features Added
438
+
439
+ - Added support for Azure OpenAI Python grader via `AzureOpenAIPythonGrader` class, which serves as a wrapper around Azure Open AI Python grader configurations. This new grader object can be supplied to the main `evaluate` method as if it were a normal callable evaluator.
440
+ - Added `attack_success_thresholds` parameter to `RedTeam` class for configuring custom thresholds that determine attack success. This allows users to set specific threshold values for each risk category, with scores greater than the threshold considered successful attacks (i.e. higher threshold means higher
441
+ tolerance for harmful responses).
442
+ - Enhanced threshold reporting in RedTeam results to include default threshold values when custom thresholds aren't specified, providing better transparency about the evaluation criteria used.
443
+
444
+
445
+ ### Bugs Fixed
446
+
447
+ - Fixed red team scan `output_path` issue where individual evaluation results were overwriting each other instead of being preserved as separate files. Individual evaluations now create unique files while the user's `output_path` is reserved for final aggregated results.
448
+ - Significant improvements to TaskAdherence evaluator. New version has less variance, is much faster and consumes fewer tokens.
449
+ - Significant improvements to Relevance evaluator. New version has more concrete rubrics and has less variance, is much faster and consumes fewer tokens.
450
+
451
+
452
+ ### Other Changes
453
+
454
+ - The default engine for evaluation was changed from `promptflow` (PFClient) to an in-SDK batch client (RunSubmitterClient)
455
+ - Note: We've temporarily kept an escape hatch to fall back to the legacy `promptflow` implementation by setting `_use_pf_client=True` when invoking `evaluate()`.
456
+ This is due to be removed in a future release.
457
+
458
+
403
459
  ## 1.9.0 (2025-07-02)
404
460
 
405
461
  ### Features Added
@@ -411,8 +467,11 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
411
467
  ### Bugs Fixed
412
468
 
413
469
  - Significant improvements to IntentResolution evaluator. New version has less variance, is nearly 2x faster and consumes fewer tokens.
470
+
471
+ - Fixes and improvements to ToolCallAccuracy evaluator. New version has less variance. and now works on all tool calls that happen in a turn at once. Previously, it worked on each tool call independently without having context on the other tool calls that happen in the same turn, and then aggregated the results to a score in the range [0-1]. The score range is now [1-5].
414
472
  - Fixed MeteorScoreEvaluator and other threshold-based evaluators returning incorrect binary results due to integer conversion of decimal scores. Previously, decimal scores like 0.9375 were incorrectly converted to integers (0) before threshold comparison, causing them to fail even when above the threshold. [#41415](https://github.com/Azure/azure-sdk-for-python/issues/41415)
415
473
  - Added a new enum `ADVERSARIAL_QA_DOCUMENTS` which moves all the "file_content" type prompts away from `ADVERSARIAL_QA` to the new enum
474
+ - `AzureOpenAIScoreModelGrader` evaluator now supports `pass_threshold` parameter to set the minimum score required for a response to be considered passing. This allows users to define custom thresholds for evaluation results, enhancing flexibility in grading AI model responses.
416
475
 
417
476
  ## 1.8.0 (2025-05-29)
418
477
 
@@ -46,9 +46,6 @@ This guide walks you through how to investigate failures, common errors in the `
46
46
  - Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport).
47
47
  - If you encounter a 403 Unauthorized error when using safety evaluators, verify that you have the `Contributor` role assigned to your Azure AI project. `Contributor` role is currently required to run safety evaluations.
48
48
 
49
- ### Troubleshoot Quality Evaluator Issues
50
- - For `ToolCallAccuracyEvaluator`, if your input did not have a tool to evaluate, the current behavior is to output `null`.
51
-
52
49
  ## Handle Simulation Errors
53
50
 
54
51
  ### Adversarial Simulation Supported Regions
@@ -46,6 +46,7 @@ from ._aoai.label_grader import AzureOpenAILabelGrader
46
46
  from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
47
47
  from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
48
48
  from ._aoai.score_model_grader import AzureOpenAIScoreModelGrader
49
+ from ._aoai.python_grader import AzureOpenAIPythonGrader
49
50
 
50
51
 
51
52
  _patch_all = []
@@ -53,21 +54,46 @@ _patch_all = []
53
54
  # The converter from the AI service to the evaluator schema requires a dependency on
54
55
  # ai.projects, but we also don't want to force users installing ai.evaluations to pull
55
56
  # in ai.projects. So we only import it if it's available and the user has ai.projects.
56
- try:
57
- from ._converters._ai_services import AIAgentConverter
57
+ # We use lazy loading to avoid printing messages during import unless the classes are actually used.
58
+ _lazy_imports = {}
58
59
 
59
- _patch_all.append("AIAgentConverter")
60
- except ImportError:
61
- print(
62
- "[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`."
63
- )
64
60
 
65
- try:
66
- from ._converters._sk_services import SKAgentConverter
61
+ def _create_lazy_import(class_name, module_path, dependency_name):
62
+ """Create a lazy import function for optional dependencies.
67
63
 
68
- _patch_all.append("SKAgentConverter")
69
- except ImportError:
70
- print("[INFO] Could not import SKAgentConverter. Please install the dependency with `pip install semantic-kernel`.")
64
+ Args:
65
+ class_name: Name of the class to import
66
+ module_path: Module path to import from
67
+ dependency_name: Name of the dependency package for error message
68
+
69
+ Returns:
70
+ A function that performs the lazy import when called
71
+ """
72
+
73
+ def lazy_import():
74
+ try:
75
+ module = __import__(module_path, fromlist=[class_name])
76
+ cls = getattr(module, class_name)
77
+ _patch_all.append(class_name)
78
+ return cls
79
+ except ImportError:
80
+ raise ImportError(
81
+ f"Could not import {class_name}. Please install the dependency with `pip install {dependency_name}`."
82
+ )
83
+
84
+ return lazy_import
85
+
86
+
87
+ _lazy_imports["AIAgentConverter"] = _create_lazy_import(
88
+ "AIAgentConverter",
89
+ "azure.ai.evaluation._converters._ai_services",
90
+ "azure-ai-projects",
91
+ )
92
+ _lazy_imports["SKAgentConverter"] = _create_lazy_import(
93
+ "SKAgentConverter",
94
+ "azure.ai.evaluation._converters._sk_services",
95
+ "semantic-kernel",
96
+ )
71
97
 
72
98
  __all__ = [
73
99
  "evaluate",
@@ -110,6 +136,14 @@ __all__ = [
110
136
  "AzureOpenAIStringCheckGrader",
111
137
  "AzureOpenAITextSimilarityGrader",
112
138
  "AzureOpenAIScoreModelGrader",
139
+ "AzureOpenAIPythonGrader",
113
140
  ]
114
141
 
115
142
  __all__.extend([p for p in _patch_all if p not in __all__])
143
+
144
+
145
+ def __getattr__(name):
146
+ """Handle lazy imports for optional dependencies."""
147
+ if name in _lazy_imports:
148
+ return _lazy_imports[name]()
149
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
@@ -0,0 +1,84 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from typing import Any, Dict, Union, Optional
5
+
6
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
+ from openai.types.graders import PythonGrader
8
+ from azure.ai.evaluation._common._experimental import experimental
9
+
10
+ from .aoai_grader import AzureOpenAIGrader
11
+
12
+
13
+ @experimental
14
+ class AzureOpenAIPythonGrader(AzureOpenAIGrader):
15
+ """
16
+ Wrapper class for OpenAI's Python code graders.
17
+
18
+ Enables custom Python-based evaluation logic with flexible scoring and
19
+ pass/fail thresholds. The grader executes user-provided Python code
20
+ to evaluate outputs against custom criteria.
21
+
22
+ Supplying a PythonGrader to the `evaluate` method will cause an
23
+ asynchronous request to evaluate the grader via the OpenAI API. The
24
+ results of the evaluation will then be merged into the standard
25
+ evaluation results.
26
+
27
+ :param model_config: The model configuration to use for the grader.
28
+ :type model_config: Union[
29
+ ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
+ ~azure.ai.evaluation.OpenAIModelConfiguration
31
+ ]
32
+ :param name: The name of the grader.
33
+ :type name: str
34
+ :param image_tag: The image tag for the Python execution environment.
35
+ :type image_tag: str
36
+ :param pass_threshold: Score threshold for pass/fail classification.
37
+ Scores >= threshold are considered passing.
38
+ :type pass_threshold: float
39
+ :param source: Python source code containing the grade function.
40
+ Must define: def grade(sample: dict, item: dict) -> float
41
+ :type source: str
42
+ :param kwargs: Additional keyword arguments to pass to the grader.
43
+ :type kwargs: Any
44
+
45
+
46
+ .. admonition:: Example:
47
+
48
+ .. literalinclude:: ../samples/evaluation_samples_common.py
49
+ :start-after: [START python_grader_example]
50
+ :end-before: [END python_grader_example]
51
+ :language: python
52
+ :dedent: 8
53
+ :caption: Using AzureOpenAIPythonGrader for custom evaluation logic.
54
+ """
55
+
56
+ id = "azureai://built-in/evaluators/azure-openai/python_grader"
57
+
58
+ def __init__(
59
+ self,
60
+ *,
61
+ model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
62
+ name: str,
63
+ image_tag: str,
64
+ pass_threshold: float,
65
+ source: str,
66
+ **kwargs: Any,
67
+ ):
68
+ # Validate pass_threshold
69
+ if not 0.0 <= pass_threshold <= 1.0:
70
+ raise ValueError("pass_threshold must be between 0.0 and 1.0")
71
+
72
+ # Store pass_threshold as instance attribute for potential future use
73
+ self.pass_threshold = pass_threshold
74
+
75
+ # Create OpenAI PythonGrader instance
76
+ grader = PythonGrader(
77
+ name=name,
78
+ image_tag=image_tag,
79
+ pass_threshold=pass_threshold,
80
+ source=source,
81
+ type="python",
82
+ )
83
+
84
+ super().__init__(model_config=model_config, grader_config=grader, **kwargs)
@@ -84,6 +84,7 @@ class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
84
84
  grader_kwargs["range"] = range
85
85
  if sampling_params is not None:
86
86
  grader_kwargs["sampling_params"] = sampling_params
87
+ grader_kwargs["pass_threshold"] = self.pass_threshold
87
88
 
88
89
  grader = ScoreModelGrader(**grader_kwargs)
89
90
 
@@ -1961,12 +1961,16 @@ class Message(_Model):
1961
1961
  :vartype role: str
1962
1962
  :ivar content: The content.
1963
1963
  :vartype content: str
1964
+ :ivar context: The context.
1965
+ :vartype context: str
1964
1966
  """
1965
1967
 
1966
1968
  role: Optional[str] = rest_field(name="Role", visibility=["read", "create", "update", "delete", "query"])
1967
1969
  """The role."""
1968
1970
  content: Optional[str] = rest_field(name="Content", visibility=["read", "create", "update", "delete", "query"])
1969
1971
  """The content."""
1972
+ context: Optional[str] = rest_field(name="Context", visibility=["read", "create", "update", "delete", "query"])
1973
+ """The context."""
1970
1974
 
1971
1975
  @overload
1972
1976
  def __init__(
@@ -1974,6 +1978,7 @@ class Message(_Model):
1974
1978
  *,
1975
1979
  role: Optional[str] = None,
1976
1980
  content: Optional[str] = None,
1981
+ context: Optional[str] = None,
1977
1982
  ) -> None: ...
1978
1983
 
1979
1984
  @overload
@@ -290,7 +290,7 @@ async def submit_request_onedp(
290
290
  payload = generate_payload(normalized_user_text, metric, annotation_task=annotation_task)
291
291
  headers = get_common_headers(token, evaluator_name)
292
292
  if scan_session_id:
293
- headers["client_request_id"] = scan_session_id
293
+ headers["x-ms-client-request-id"] = scan_session_id
294
294
  response = client.evaluations.submit_annotation(payload, headers=headers)
295
295
  result = json.loads(response)
296
296
  operation_id = result["location"].split("/")[-1]
@@ -319,8 +319,8 @@ async def fetch_result(operation_id: str, rai_svc_url: str, credential: TokenCre
319
319
  token = await fetch_or_reuse_token(credential, token)
320
320
  headers = get_common_headers(token)
321
321
 
322
- async with get_async_http_client_with_timeout() as client:
323
- response = await client.get(url, headers=headers)
322
+ async with get_async_http_client() as client:
323
+ response = await client.get(url, headers=headers, timeout=RAIService.TIMEOUT)
324
324
 
325
325
  if response.status_code == 200:
326
326
  return response.json()
@@ -6,11 +6,11 @@ import posixpath
6
6
  import re
7
7
  import math
8
8
  import threading
9
- from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
9
+ from typing import Any, List, Literal, Mapping, Optional, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
10
10
 
11
11
  import nltk
12
12
  from azure.storage.blob import ContainerClient
13
- from typing_extensions import NotRequired, Required, TypeGuard
13
+ from typing_extensions import NotRequired, Required, TypeGuard, TypeIs
14
14
  from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
15
15
  from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
16
16
  from azure.ai.evaluation._exceptions import ErrorMessage, ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
@@ -127,17 +127,15 @@ def construct_prompty_model_config(
127
127
  return prompty_model_config
128
128
 
129
129
 
130
- def is_onedp_project(azure_ai_project: AzureAIProject) -> bool:
130
+ def is_onedp_project(azure_ai_project: Optional[Union[str, AzureAIProject]]) -> TypeIs[str]:
131
131
  """Check if the Azure AI project is an OneDP project.
132
132
 
133
133
  :param azure_ai_project: The scope of the Azure AI project.
134
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
134
+ :type azure_ai_project: Optional[Union[str,~azure.ai.evaluation.AzureAIProject]]
135
135
  :return: True if the Azure AI project is an OneDP project, False otherwise.
136
136
  :rtype: bool
137
137
  """
138
- if isinstance(azure_ai_project, str):
139
- return True
140
- return False
138
+ return isinstance(azure_ai_project, str)
141
139
 
142
140
 
143
141
  def validate_azure_ai_project(o: object) -> AzureAIProject:
@@ -494,14 +492,17 @@ def _extract_text_from_content(content):
494
492
  return text
495
493
 
496
494
 
497
- def _get_conversation_history(query):
495
+ def _get_conversation_history(query, include_system_messages=False):
498
496
  all_user_queries = []
499
497
  cur_user_query = []
500
498
  all_agent_responses = []
501
499
  cur_agent_response = []
500
+ system_message = None
502
501
  for msg in query:
503
502
  if not "role" in msg:
504
503
  continue
504
+ if include_system_messages and msg["role"] == "system" and "content" in msg:
505
+ system_message = msg.get("content", "")
505
506
  if msg["role"] == "user" and "content" in msg:
506
507
  if cur_agent_response != []:
507
508
  all_agent_responses.append(cur_agent_response)
@@ -530,13 +531,18 @@ def _get_conversation_history(query):
530
531
  category=ErrorCategory.INVALID_VALUE,
531
532
  blame=ErrorBlame.USER_ERROR,
532
533
  )
533
-
534
- return {"user_queries": all_user_queries, "agent_responses": all_agent_responses}
534
+ result = {"user_queries": all_user_queries, "agent_responses": all_agent_responses}
535
+ if include_system_messages:
536
+ result["system_message"] = system_message
537
+ return result
535
538
 
536
539
 
537
540
  def _pretty_format_conversation_history(conversation_history):
538
541
  """Formats the conversation history for better readability."""
539
542
  formatted_history = ""
543
+ if "system_message" in conversation_history and conversation_history["system_message"] is not None:
544
+ formatted_history += "SYSTEM_PROMPT:\n"
545
+ formatted_history += " " + conversation_history["system_message"] + "\n\n"
540
546
  for i, (user_query, agent_response) in enumerate(
541
547
  zip(conversation_history["user_queries"], conversation_history["agent_responses"] + [None])
542
548
  ):
@@ -552,10 +558,10 @@ def _pretty_format_conversation_history(conversation_history):
552
558
  return formatted_history
553
559
 
554
560
 
555
- def reformat_conversation_history(query, logger=None):
561
+ def reformat_conversation_history(query, logger=None, include_system_messages=False):
556
562
  """Reformats the conversation history to a more compact representation."""
557
563
  try:
558
- conversation_history = _get_conversation_history(query)
564
+ conversation_history = _get_conversation_history(query, include_system_messages=include_system_messages)
559
565
  return _pretty_format_conversation_history(conversation_history)
560
566
  except:
561
567
  # If the conversation history cannot be parsed for whatever reason (e.g. the converter format changed), the original query is returned
@@ -570,22 +576,53 @@ def reformat_conversation_history(query, logger=None):
570
576
  return query
571
577
 
572
578
 
573
- def _get_agent_response(agent_response_msgs):
574
- """Extracts the text from the agent response content."""
579
+ def _get_agent_response(agent_response_msgs, include_tool_messages=False):
580
+ """Extracts formatted agent response including text, and optionally tool calls/results."""
575
581
  agent_response_text = []
582
+ tool_results = {}
583
+
584
+ # First pass: collect tool results
585
+ if include_tool_messages:
586
+ for msg in agent_response_msgs:
587
+ if msg.get("role") == "tool" and "tool_call_id" in msg:
588
+ for content in msg.get("content", []):
589
+ if content.get("type") == "tool_result":
590
+ result = content.get("tool_result")
591
+ tool_results[msg["tool_call_id"]] = f"[TOOL_RESULT] {result}"
592
+
593
+ # Second pass: parse assistant messages and tool calls
576
594
  for msg in agent_response_msgs:
577
- if "role" in msg and msg["role"] == "assistant" and "content" in msg:
595
+ if "role" in msg and msg.get("role") == "assistant" and "content" in msg:
578
596
  text = _extract_text_from_content(msg["content"])
579
597
  if text:
580
598
  agent_response_text.extend(text)
599
+ if include_tool_messages:
600
+ for content in msg.get("content", []):
601
+ # Todo: Verify if this is the correct way to handle tool calls
602
+ if content.get("type") == "tool_call":
603
+ if "tool_call" in content and "function" in content.get("tool_call", {}):
604
+ tc = content.get("tool_call", {})
605
+ func_name = tc.get("function", {}).get("name", "")
606
+ args = tc.get("function", {}).get("arguments", {})
607
+ tool_call_id = tc.get("id")
608
+ else:
609
+ tool_call_id = content.get("tool_call_id")
610
+ func_name = content.get("name", "")
611
+ args = content.get("arguments", {})
612
+ args_str = ", ".join(f'{k}="{v}"' for k, v in args.items())
613
+ call_line = f"[TOOL_CALL] {func_name}({args_str})"
614
+ agent_response_text.append(call_line)
615
+ if tool_call_id in tool_results:
616
+ agent_response_text.append(tool_results[tool_call_id])
617
+
581
618
  return agent_response_text
582
619
 
583
620
 
584
- def reformat_agent_response(response, logger=None):
621
+ def reformat_agent_response(response, logger=None, include_tool_messages=False):
585
622
  try:
586
623
  if response is None or response == []:
587
624
  return ""
588
- agent_response = _get_agent_response(response)
625
+ agent_response = _get_agent_response(response, include_tool_messages=include_tool_messages)
589
626
  if agent_response == []:
590
627
  # If no message could be extracted, likely the format changed, fallback to the original response in that case
591
628
  if logger:
@@ -602,6 +639,26 @@ def reformat_agent_response(response, logger=None):
602
639
  return response
603
640
 
604
641
 
642
+ def reformat_tool_definitions(tool_definitions, logger=None):
643
+ try:
644
+ output_lines = ["TOOL_DEFINITIONS:"]
645
+ for tool in tool_definitions:
646
+ name = tool.get("name", "unnamed_tool")
647
+ desc = tool.get("description", "").strip()
648
+ params = tool.get("parameters", {}).get("properties", {})
649
+ param_names = ", ".join(params.keys()) if params else "no parameters"
650
+ output_lines.append(f"- {name}: {desc} (inputs: {param_names})")
651
+ return "\n".join(output_lines)
652
+ except Exception as e:
653
+ # If the tool definitions cannot be parsed for whatever reason, the original tool definitions are returned
654
+ # This is a fallback to ensure that the evaluation can still proceed. See comments on reformat_conversation_history for more details.
655
+ if logger:
656
+ logger.warning(
657
+ f"Tool definitions could not be parsed, falling back to original definitions: {tool_definitions}"
658
+ )
659
+ return tool_definitions
660
+
661
+
605
662
  def upload(path: str, container_client: ContainerClient, logger=None):
606
663
  """Upload files or directories to Azure Blob Storage using a container client.
607
664