azure-ai-evaluation 1.10.0__tar.gz → 1.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (391) hide show
  1. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/CHANGELOG.md +21 -0
  2. {azure_ai_evaluation-1.10.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.11.1}/PKG-INFO +39 -3
  3. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/models/_models.py +5 -0
  4. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_converters/_ai_services.py +60 -10
  5. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_converters/_models.py +75 -26
  6. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_eval_run.py +14 -1
  7. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_evaluate.py +13 -4
  8. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +104 -35
  9. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_utils.py +4 -0
  10. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +2 -1
  11. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +113 -19
  12. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +7 -2
  13. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +1 -1
  14. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -1
  15. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +113 -3
  16. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +8 -2
  17. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +2 -1
  18. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +10 -2
  19. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +2 -1
  20. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +2 -1
  21. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +8 -2
  22. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +104 -60
  23. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +58 -41
  24. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_exceptions.py +1 -0
  25. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_version.py +1 -1
  26. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/__init__.py +2 -1
  27. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_attack_objective_generator.py +17 -0
  28. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_callback_chat_target.py +14 -1
  29. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_evaluation_processor.py +376 -0
  30. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_mlflow_integration.py +322 -0
  31. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_orchestrator_manager.py +661 -0
  32. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_red_team.py +1164 -0
  33. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_result_processor.py +610 -0
  34. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  35. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +3 -1
  36. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +6 -0
  37. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  38. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  39. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +115 -13
  40. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/metric_mapping.py +24 -4
  41. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  42. azure_ai_evaluation-1.11.1/azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  43. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +17 -4
  44. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_adversarial_simulator.py +9 -0
  45. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +19 -5
  46. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +4 -3
  47. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1/azure_ai_evaluation.egg-info}/PKG-INFO +39 -3
  48. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure_ai_evaluation.egg-info/SOURCES.txt +12 -0
  49. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure_ai_evaluation.egg-info/requires.txt +2 -1
  50. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/aoai_score_model_grader_sample.py +61 -7
  51. azure_ai_evaluation-1.11.1/samples/data/custom_objectives_with_context_example.json +51 -0
  52. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/evaluation_samples_evaluate.py +40 -27
  53. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/evaluation_samples_evaluate_fdp.py +7 -0
  54. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/evaluation_samples_threshold.py +16 -16
  55. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/red_team_samples.py +56 -0
  56. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/setup.py +2 -2
  57. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/conftest.py +59 -1
  58. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/converters/ai_agent_converter/serialization_helper.py +6 -1
  59. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +4 -4
  60. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_builtin_evaluators.py +24 -0
  61. azure_ai_evaluation-1.11.1/tests/e2etests/test_red_team.py +379 -0
  62. azure_ai_evaluation-1.11.1/tests/unittests/test_aoai_alignment_missing_rows.py +90 -0
  63. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_aoai_evaluation_pagination.py +13 -5
  64. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_aoai_score_model_grader.py +20 -3
  65. azure_ai_evaluation-1.11.1/tests/unittests/test_built_in_evaluator.py +254 -0
  66. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_eval_run.py +291 -1
  67. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluate.py +315 -1
  68. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_attack_objective_generator.py +4 -0
  69. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_callback_chat_target.py +77 -1
  70. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +1 -1
  71. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_red_team.py +203 -209
  72. azure_ai_evaluation-1.11.1/tests/unittests/test_redteam/test_red_team_language_support.py +213 -0
  73. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_strategy_utils.py +61 -1
  74. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_tool_call_accuracy_evaluator.py +295 -7
  75. azure_ai_evaluation-1.10.0/azure/ai/evaluation/red_team/_red_team.py +0 -3534
  76. azure_ai_evaluation-1.10.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  77. azure_ai_evaluation-1.10.0/tests/unittests/test_built_in_evaluator.py +0 -130
  78. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/MANIFEST.in +0 -0
  79. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/NOTICE.txt +0 -0
  80. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/README.md +0 -0
  81. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/TROUBLESHOOTING.md +0 -0
  82. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/__init__.py +0 -0
  83. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/__init__.py +0 -0
  84. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/__init__.py +0 -0
  85. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/__init__.py +0 -0
  86. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/aoai_grader.py +0 -0
  87. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/label_grader.py +0 -0
  88. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/python_grader.py +0 -0
  89. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/score_model_grader.py +0 -0
  90. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/string_check_grader.py +0 -0
  91. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_aoai/text_similarity_grader.py +0 -0
  92. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_azure/__init__.py +0 -0
  93. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_azure/_clients.py +0 -0
  94. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_azure/_envs.py +0 -0
  95. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_azure/_models.py +0 -0
  96. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  97. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/__init__.py +0 -0
  98. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/_experimental.py +0 -0
  99. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/constants.py +0 -0
  100. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/evaluation_onedp_client.py +0 -0
  101. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/math.py +0 -0
  102. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/__init__.py +0 -0
  103. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_client.py +0 -0
  104. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_configuration.py +0 -0
  105. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
  106. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_patch.py +0 -0
  107. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
  108. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
  109. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_utils/__init__.py +0 -0
  110. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_utils/model_base.py +0 -0
  111. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_utils/serialization.py +0 -0
  112. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_validation.py +0 -0
  113. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
  114. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/_version.py +0 -0
  115. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/__init__.py +0 -0
  116. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/_client.py +0 -0
  117. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +0 -0
  118. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/_patch.py +0 -0
  119. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +0 -0
  120. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +0 -0
  121. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +0 -0
  122. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/models/__init__.py +0 -0
  123. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/models/_enums.py +0 -0
  124. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/models/_patch.py +0 -0
  125. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/operations/__init__.py +0 -0
  126. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/operations/_operations.py +0 -0
  127. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -0
  128. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
  129. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
  130. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
  131. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
  132. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
  133. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
  134. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
  135. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
  136. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
  137. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
  138. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
  139. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
  140. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
  141. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
  142. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
  143. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
  144. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
  145. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/rai_service.py +0 -0
  146. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
  147. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  148. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  149. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  150. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  151. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  152. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
  153. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  154. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  155. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  156. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  157. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  158. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  159. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  160. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  161. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  162. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  163. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  164. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  165. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
  166. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  167. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
  168. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_common/utils.py +0 -0
  169. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_constants.py +0 -0
  170. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_converters/__init__.py +0 -0
  171. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_converters/_sk_services.py +0 -0
  172. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_eval_mapping.py +0 -0
  173. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  174. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  175. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +0 -0
  176. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  177. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  178. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  179. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  180. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  181. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  182. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  183. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  184. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
  185. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  186. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
  187. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  188. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  189. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  190. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
  191. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  192. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  193. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
  194. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
  195. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
  196. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
  197. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
  198. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +0 -0
  199. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +0 -0
  200. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  201. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  202. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  203. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
  204. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  205. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  206. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  207. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  208. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  209. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  210. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  211. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  212. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
  213. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  214. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  215. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  216. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  217. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  218. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
  219. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  220. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  221. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  222. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
  223. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  224. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  225. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  226. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
  227. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  228. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  229. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  230. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  231. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  232. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
  233. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  234. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  235. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
  236. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  237. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  238. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_http_utils.py +0 -0
  239. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/__init__.py +0 -0
  240. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
  241. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
  242. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
  243. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
  244. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
  245. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
  246. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
  247. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
  248. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
  249. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
  250. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
  251. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
  252. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  253. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
  254. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +0 -0
  255. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  256. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
  257. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +0 -0
  258. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
  259. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  260. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +0 -0
  261. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
  262. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
  263. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
  264. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  265. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_common/__init__.py +0 -0
  266. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +0 -0
  267. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
  268. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +0 -0
  269. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  270. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
  271. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
  272. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
  273. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
  274. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  275. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_model_configurations.py +0 -0
  276. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
  277. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  278. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
  279. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_user_agent.py +0 -0
  280. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_vendor/__init__.py +0 -0
  281. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  282. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  283. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  284. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  285. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  286. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/py.typed +0 -0
  287. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_agent/__init__.py +0 -0
  288. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +0 -0
  289. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +0 -0
  290. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +0 -0
  291. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +0 -0
  292. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_attack_strategy.py +0 -0
  293. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
  294. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_red_team_result.py +0 -0
  295. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +0 -0
  296. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/constants.py +0 -0
  297. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
  298. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/__init__.py +0 -0
  299. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  300. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_constants.py +0 -0
  301. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
  302. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
  303. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  304. {azure_ai_evaluation-1.10.0/azure/ai/evaluation/red_team/_utils → azure_ai_evaluation-1.11.1/azure/ai/evaluation/simulator/_data_sources}/__init__.py +0 -0
  305. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  306. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  307. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  308. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  309. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  310. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
  311. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  312. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  313. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  314. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
  315. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  316. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  317. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  318. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  319. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_simulator.py +0 -0
  320. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure/ai/evaluation/simulator/_utils.py +0 -0
  321. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  322. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  323. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  324. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/migration_guide.md +0 -0
  325. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/pyproject.toml +0 -0
  326. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/README.md +0 -0
  327. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
  328. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/instructions.md +0 -0
  329. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  330. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/response_completeness.ipynb +0 -0
  331. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  332. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/task_adherence.ipynb +0 -0
  333. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
  334. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/agent_evaluators/user_functions.py +0 -0
  335. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/data/evaluate_test_data.jsonl +0 -0
  336. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/evaluation_samples_common.py +0 -0
  337. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/evaluation_samples_safety_evaluation.py +0 -0
  338. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/evaluation_samples_simulate.py +0 -0
  339. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/red_team_agent_tool_sample.py +0 -0
  340. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/red_team_skip_upload.py +0 -0
  341. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/samples/semantic_kernel_red_team_agent_sample.py +0 -0
  342. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/setup.cfg +0 -0
  343. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/__init__.py +0 -0
  344. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/__openai_patcher.py +0 -0
  345. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
  346. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +0 -0
  347. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +0 -0
  348. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/__init__.py +0 -0
  349. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  350. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/target_fn.py +0 -0
  351. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_adv_simulator.py +0 -0
  352. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_aoai_graders.py +0 -0
  353. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_evaluate.py +0 -0
  354. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_lite_management_client.py +0 -0
  355. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_mass_evaluate.py +0 -0
  356. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_metrics_upload.py +0 -0
  357. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_prompty_async.py +0 -0
  358. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_remote_evaluation.py +0 -0
  359. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/e2etests/test_sim_and_eval.py +0 -0
  360. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_agent_evaluators.py +0 -0
  361. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_aoai_integration_features.py +0 -0
  362. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_aoai_python_grader.py +0 -0
  363. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_batch_run_context.py +0 -0
  364. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_completeness_evaluator.py +0 -0
  365. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  366. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_content_safety_rai_script.py +0 -0
  367. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_document_retrieval_evaluator.py +0 -0
  368. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluate_mismatch.py +0 -0
  369. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluate_performance.py +0 -0
  370. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  371. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
  372. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  373. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
  374. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
  375. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_jailbreak_simulator.py +0 -0
  376. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_lazy_imports.py +0 -0
  377. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_non_adv_simulator.py +0 -0
  378. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/__init__.py +0 -0
  379. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_attack_strategy.py +0 -0
  380. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_constants.py +0 -0
  381. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_formatting_utils.py +0 -0
  382. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_rai_service_target.py +0 -0
  383. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +0 -0
  384. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_redteam/test_red_team_result.py +0 -0
  385. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_remote_evaluation_features.py +0 -0
  386. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_safety_evaluation.py +0 -0
  387. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_save_eval.py +0 -0
  388. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_simulator.py +0 -0
  389. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  390. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  391. {azure_ai_evaluation-1.10.0 → azure_ai_evaluation-1.11.1}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,26 @@
1
1
  # Release History
2
2
 
3
+ ## 1.11.1 (2025-09-17)
4
+
5
+ ### Bugs Fixed
6
+ - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
7
+
8
+ ## 1.11.0 (2025-09-02)
9
+
10
+ ### Features Added
11
+ - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
12
+ - Added support for user-supplied TokenCredentials with LLM based evaluators.
13
+ - Enhanced `GroundednessEvaluator` to support AI agent evaluation with tool calls. The evaluator now accepts agent response data containing tool calls and can extract context from `file_search` tool results for groundedness assessment. This enables evaluation of AI agents that use tools to retrieve information and generate responses. Note: Agent groundedness evaluation is currently supported only when the `file_search` tool is used.
14
+ - Added `language` parameter to `RedTeam` class for multilingual red team scanning support. The parameter accepts values from `SupportedLanguages` enum including English, Spanish, French, German, Italian, Portuguese, Japanese, Korean, and Simplified Chinese, enabling red team attacks to be generated and conducted in multiple languages.
15
+ - Added support for IndirectAttack and UngroundedAttributes risk categories in `RedTeam` scanning. These new risk categories expand red team capabilities to detect cross-platform indirect attacks and evaluate ungrounded inferences about human attributes including emotional state and protected class information.
16
+
17
+ ### Bugs Fixed
18
+ - Fixed issue where evaluation results were not properly aligned with input data, leading to incorrect metrics being reported.
19
+
20
+ ### Other Changes
21
+ - Deprecating `AdversarialSimulator` in favor of the [AI Red Teaming Agent](https://aka.ms/airedteamingagent-sample). `AdversarialSimulator` will be removed in the next minor release.
22
+ - Moved retry configuration constants (`MAX_RETRY_ATTEMPTS`, `MAX_RETRY_WAIT_SECONDS`, `MIN_RETRY_WAIT_SECONDS`) from `RedTeam` class to new `RetryManager` class for better code organization and configurability.
23
+
3
24
  ## 1.10.0 (2025-07-31)
4
25
 
5
26
  ### Breaking Changes
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: azure-ai-evaluation
3
- Version: 1.10.0
3
+ Version: 1.11.1
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -28,13 +28,28 @@ Requires-Dist: nltk>=3.9.1
28
28
  Requires-Dist: azure-storage-blob>=12.10.0
29
29
  Requires-Dist: httpx>=0.25.1
30
30
  Requires-Dist: pandas<3.0.0,>=2.1.2
31
- Requires-Dist: openai>=1.78.0
31
+ Requires-Dist: openai>=1.108.0
32
32
  Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
33
33
  Requires-Dist: msrest>=0.6.21
34
34
  Requires-Dist: Jinja2>=3.1.6
35
35
  Requires-Dist: aiohttp>=3.0
36
36
  Provides-Extra: redteam
37
37
  Requires-Dist: pyrit==0.8.1; extra == "redteam"
38
+ Requires-Dist: duckdb==1.3.2; extra == "redteam"
39
+ Dynamic: author
40
+ Dynamic: author-email
41
+ Dynamic: classifier
42
+ Dynamic: description
43
+ Dynamic: description-content-type
44
+ Dynamic: home-page
45
+ Dynamic: keywords
46
+ Dynamic: license
47
+ Dynamic: license-file
48
+ Dynamic: project-url
49
+ Dynamic: provides-extra
50
+ Dynamic: requires-dist
51
+ Dynamic: requires-python
52
+ Dynamic: summary
38
53
 
39
54
  # Azure AI Evaluation client library for Python
40
55
 
@@ -398,6 +413,27 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
398
413
 
399
414
  # Release History
400
415
 
416
+ ## 1.11.1 (2025-09-17)
417
+
418
+ ### Bugs Fixed
419
+ - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
420
+
421
+ ## 1.11.0 (2025-09-02)
422
+
423
+ ### Features Added
424
+ - Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
425
+ - Added support for user-supplied TokenCredentials with LLM based evaluators.
426
+ - Enhanced `GroundednessEvaluator` to support AI agent evaluation with tool calls. The evaluator now accepts agent response data containing tool calls and can extract context from `file_search` tool results for groundedness assessment. This enables evaluation of AI agents that use tools to retrieve information and generate responses. Note: Agent groundedness evaluation is currently supported only when the `file_search` tool is used.
427
+ - Added `language` parameter to `RedTeam` class for multilingual red team scanning support. The parameter accepts values from `SupportedLanguages` enum including English, Spanish, French, German, Italian, Portuguese, Japanese, Korean, and Simplified Chinese, enabling red team attacks to be generated and conducted in multiple languages.
428
+ - Added support for IndirectAttack and UngroundedAttributes risk categories in `RedTeam` scanning. These new risk categories expand red team capabilities to detect cross-platform indirect attacks and evaluate ungrounded inferences about human attributes including emotional state and protected class information.
429
+
430
+ ### Bugs Fixed
431
+ - Fixed issue where evaluation results were not properly aligned with input data, leading to incorrect metrics being reported.
432
+
433
+ ### Other Changes
434
+ - Deprecating `AdversarialSimulator` in favor of the [AI Red Teaming Agent](https://aka.ms/airedteamingagent-sample). `AdversarialSimulator` will be removed in the next minor release.
435
+ - Moved retry configuration constants (`MAX_RETRY_ATTEMPTS`, `MAX_RETRY_WAIT_SECONDS`, `MIN_RETRY_WAIT_SECONDS`) from `RedTeam` class to new `RetryManager` class for better code organization and configurability.
436
+
401
437
  ## 1.10.0 (2025-07-31)
402
438
 
403
439
  ### Breaking Changes
@@ -1961,12 +1961,16 @@ class Message(_Model):
1961
1961
  :vartype role: str
1962
1962
  :ivar content: The content.
1963
1963
  :vartype content: str
1964
+ :ivar context: The context.
1965
+ :vartype context: str
1964
1966
  """
1965
1967
 
1966
1968
  role: Optional[str] = rest_field(name="Role", visibility=["read", "create", "update", "delete", "query"])
1967
1969
  """The role."""
1968
1970
  content: Optional[str] = rest_field(name="Content", visibility=["read", "create", "update", "delete", "query"])
1969
1971
  """The content."""
1972
+ context: Optional[str] = rest_field(name="Context", visibility=["read", "create", "update", "delete", "query"])
1973
+ """The context."""
1970
1974
 
1971
1975
  @overload
1972
1976
  def __init__(
@@ -1974,6 +1978,7 @@ class Message(_Model):
1974
1978
  *,
1975
1979
  role: Optional[str] = None,
1976
1980
  content: Optional[str] = None,
1981
+ context: Optional[str] = None,
1977
1982
  ) -> None: ...
1978
1983
 
1979
1984
  @overload
@@ -11,7 +11,18 @@ from azure.ai.evaluation._common._experimental import experimental
11
11
  from packaging.version import Version
12
12
 
13
13
  # Constants.
14
- from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION, _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
14
+ from ._models import (
15
+ _USER,
16
+ _AGENT,
17
+ _TOOL,
18
+ _TOOL_CALL,
19
+ _TOOL_CALLS,
20
+ _FUNCTION,
21
+ _BUILT_IN_DESCRIPTIONS,
22
+ _BUILT_IN_PARAMS,
23
+ _OPENAPI,
24
+ OpenAPIToolDefinition,
25
+ )
15
26
 
16
27
  # Message instances.
17
28
  from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
@@ -93,7 +104,7 @@ class AIAgentConverter:
93
104
  return tool_calls_chronological
94
105
 
95
106
  @staticmethod
96
- def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinition]:
107
+ def _extract_function_tool_definitions(thread_run: object) -> List[Union[ToolDefinition, OpenAPIToolDefinition]]:
97
108
  """
98
109
  Extracts tool definitions from a thread run.
99
110
 
@@ -121,6 +132,26 @@ class AIAgentConverter:
121
132
  parameters=parameters,
122
133
  )
123
134
  )
135
+ elif tool.type == _OPENAPI:
136
+ openapi_tool = tool.openapi
137
+ tool_definition = OpenAPIToolDefinition(
138
+ name=openapi_tool.name,
139
+ description=openapi_tool.description,
140
+ type=_OPENAPI,
141
+ spec=openapi_tool.spec,
142
+ auth=openapi_tool.auth.as_dict(),
143
+ default_params=openapi_tool.default_params.as_dict() if openapi_tool.default_params else None,
144
+ functions=[
145
+ ToolDefinition(
146
+ name=func.get("name"),
147
+ description=func.get("description"),
148
+ parameters=func.get("parameters"),
149
+ type="function",
150
+ )
151
+ for func in openapi_tool.get("functions")
152
+ ],
153
+ )
154
+ final_tools.append(tool_definition)
124
155
  else:
125
156
  # Add limited support for built-in tools. Descriptions and parameters
126
157
  # are not published, but we'll include placeholders.
@@ -243,16 +274,30 @@ class AIAgentConverter:
243
274
  if len(single_turn.content) < 1:
244
275
  continue
245
276
 
246
- # Build the content of the text message.
247
- content = {
248
- "type": "text",
249
- "text": single_turn.content[0].text.value,
250
- }
277
+ content_list = []
278
+ # If content is a list, process all content items.
279
+ for content_item in single_turn.content:
280
+ if content_item.type == "text":
281
+ content_list.append(
282
+ {
283
+ "type": "text",
284
+ "text": content_item.text.value,
285
+ }
286
+ )
287
+ elif content_item.type == "image":
288
+ content_list.append(
289
+ {
290
+ "type": "image",
291
+ "image": {
292
+ "file_id": content_item.image_file.file_id,
293
+ },
294
+ }
295
+ )
251
296
 
252
297
  # If we have a user message, then we save it as such and since it's a human message, there is no
253
298
  # run_id associated with it.
254
299
  if single_turn.role == _USER:
255
- final_messages.append(UserMessage(content=[content], createdAt=single_turn.created_at))
300
+ final_messages.append(UserMessage(content=content_list, createdAt=single_turn.created_at))
256
301
  continue
257
302
 
258
303
  # In this case, we have an assistant message. Unfortunately, this would only have the user-facing
@@ -261,7 +306,7 @@ class AIAgentConverter:
261
306
  if single_turn.role == _AGENT:
262
307
  # We are required to put the run_id in the assistant message.
263
308
  final_messages.append(
264
- AssistantMessage(content=[content], run_id=single_turn.run_id, createdAt=single_turn.created_at)
309
+ AssistantMessage(content=content_list, run_id=single_turn.run_id, createdAt=single_turn.created_at)
265
310
  )
266
311
  continue
267
312
 
@@ -791,6 +836,7 @@ class LegacyAgentDataRetriever(AIAgentDataRetriever):
791
836
  limit=self._AI_SERVICES_API_MAX_LIMIT,
792
837
  order="asc",
793
838
  after=after,
839
+ include=["step_details.tool_calls[*].file_search.results[*].content"],
794
840
  )
795
841
  has_more = run_steps.has_more
796
842
  after = run_steps.last_id
@@ -838,7 +884,11 @@ class FDPAgentDataRetriever(AIAgentDataRetriever):
838
884
  def _list_run_steps_chronological(self, thread_id: str, run_id: str):
839
885
 
840
886
  return self.project_client.agents.run_steps.list(
841
- thread_id=thread_id, run_id=run_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc"
887
+ thread_id=thread_id,
888
+ run_id=run_id,
889
+ limit=self._AI_SERVICES_API_MAX_LIMIT,
890
+ order="asc",
891
+ include=["step_details.tool_calls[*].file_search.results[*].content"],
842
892
  )
843
893
 
844
894
  def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
@@ -3,17 +3,31 @@ import json
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
- from typing import List, Optional, Union
6
+ from typing import TYPE_CHECKING, Any, List, Optional, Union
7
7
 
8
8
  # Models moved in a later version of agents SDK, so try a few different locations
9
- try:
10
- from azure.ai.projects.models import RunStepFunctionToolCall
11
- except ImportError:
12
- pass
13
- try:
14
- from azure.ai.agents.models import RunStepFunctionToolCall
15
- except ImportError:
16
- pass
9
+ # Only import for type checking to avoid runtime import errors
10
+ if TYPE_CHECKING:
11
+ try:
12
+ from azure.ai.projects.models import RunStepFunctionToolCall
13
+ except ImportError:
14
+ try:
15
+ from azure.ai.agents.models import RunStepFunctionToolCall
16
+ except ImportError:
17
+ # Create a protocol for type checking when the real class isn't available
18
+ from typing import Protocol
19
+
20
+ class RunStepFunctionToolCall(Protocol):
21
+ """Protocol defining the expected interface for RunStepFunctionToolCall."""
22
+
23
+ id: str
24
+ type: str
25
+
26
+ def get(self, key: str, default: Any = None) -> Any: ...
27
+
28
+ else:
29
+ # At runtime, we don't need the actual class since it's only used in type annotations
30
+ RunStepFunctionToolCall = Any
17
31
 
18
32
  # Message roles constants.
19
33
  _SYSTEM = "system"
@@ -33,9 +47,12 @@ _TOOL_CALLS = "tool_calls"
33
47
  # Constants to only be used internally in this file for the built-in tools.
34
48
  _CODE_INTERPRETER = "code_interpreter"
35
49
  _BING_GROUNDING = "bing_grounding"
50
+ _BING_CUSTOM_SEARCH = "bing_custom_search"
36
51
  _FILE_SEARCH = "file_search"
37
52
  _AZURE_AI_SEARCH = "azure_ai_search"
53
+ _SHAREPOINT_GROUNDING = "sharepoint_grounding"
38
54
  _FABRIC_DATAAGENT = "fabric_dataagent"
55
+ _OPENAPI = "openapi"
39
56
 
40
57
  # Built-in tool descriptions and parameters are hidden, but we include basic descriptions
41
58
  # for evaluation purposes.
@@ -44,8 +61,10 @@ _BUILT_IN_DESCRIPTIONS = {
44
61
  + "generate code, and create graphs and charts using your data. Supports "
45
62
  + "up to 20 files.",
46
63
  _BING_GROUNDING: "Enhance model output with web data.",
47
- _FILE_SEARCH: "Search for data across uploaded files.",
64
+ _BING_CUSTOM_SEARCH: "Enables agents to retrieve content from a curated subset of websites, enhancing relevance and reducing noise from public web searches.",
65
+ _FILE_SEARCH: "Search for data across uploaded files. A single call can return multiple results/files in the 'results' field.",
48
66
  _AZURE_AI_SEARCH: "Search an Azure AI Search index for relevant data.",
67
+ _SHAREPOINT_GROUNDING: "Allows agents to access and retrieve relevant content from Microsoft SharePoint document libraries, grounding responses in organizational knowledge.",
49
68
  _FABRIC_DATAAGENT: "Connect to Microsoft Fabric data agents to retrieve data across different data sources.",
50
69
  }
51
70
 
@@ -59,6 +78,15 @@ _BUILT_IN_PARAMS = {
59
78
  "type": "object",
60
79
  "properties": {"requesturl": {"type": "string", "description": "URL used in Bing Search API."}},
61
80
  },
81
+ _BING_CUSTOM_SEARCH: {
82
+ "type": "object",
83
+ "properties": {
84
+ "requesturl": {
85
+ "type": "string",
86
+ "description": "Search queries, along with pre-configured site restrictions or domain filters.",
87
+ }
88
+ },
89
+ },
62
90
  _FILE_SEARCH: {
63
91
  "type": "object",
64
92
  "properties": {
@@ -76,6 +104,12 @@ _BUILT_IN_PARAMS = {
76
104
  "type": "object",
77
105
  "properties": {"input": {"type": "string", "description": "Search terms to use."}},
78
106
  },
107
+ _SHAREPOINT_GROUNDING: {
108
+ "type": "object",
109
+ "properties": {
110
+ "input": {"type": "string", "description": "A natural language query to search SharePoint content."}
111
+ },
112
+ },
79
113
  _FABRIC_DATAAGENT: {
80
114
  "type": "object",
81
115
  "properties": {"input": {"type": "string", "description": "Search terms to use."}},
@@ -217,6 +251,27 @@ class ToolDefinition(BaseModel):
217
251
  parameters: dict
218
252
 
219
253
 
254
+ class OpenAPIToolDefinition(BaseModel):
255
+ """Represents OpenAPI tool definition that will be used in the agent.
256
+ :param name: The name of the tool.
257
+ :type name: str
258
+ :param type: The type of the tool.
259
+ :type type: str
260
+ :param description: A description of the tool.
261
+ :type description: str
262
+ :param parameters: The parameters required by the tool.
263
+ :type parameters: dict
264
+ """
265
+
266
+ name: str
267
+ type: str
268
+ description: Optional[str] = None
269
+ spec: object
270
+ auth: object
271
+ default_params: Optional[list[str]] = None
272
+ functions: list[ToolDefinition]
273
+
274
+
220
275
  class ToolCall:
221
276
  """Represents a tool call, used as an intermediate step in the conversion process.
222
277
 
@@ -247,7 +302,7 @@ class EvaluatorData(BaseModel):
247
302
 
248
303
  query: List[Message]
249
304
  response: List[Message]
250
- tool_definitions: List[ToolDefinition]
305
+ tool_definitions: List[Union[ToolDefinition, OpenAPIToolDefinition]]
251
306
 
252
307
  def to_json(self):
253
308
  """Converts the result to a JSON string.
@@ -277,14 +332,16 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
277
332
  # all in most of the cases, and bing would only show the API URL, without arguments or results.
278
333
  # Bing grounding would have "bing_grounding" in details with "requesturl" that will just be the API path with query.
279
334
  # TODO: Work with AI Services to add converter support for BingGrounding and CodeInterpreter.
280
- if hasattr(tool_call.details, _FUNCTION):
335
+ if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
281
336
  # This is the internals of the content object that will be included with the tool call.
282
337
  tool_call_id = tool_call.details.id
283
338
  content_tool_call = {
284
339
  "type": _TOOL_CALL,
285
340
  "tool_call_id": tool_call_id,
286
- "name": tool_call.details.function.name,
287
- "arguments": safe_loads(tool_call.details.function.arguments),
341
+ "name": tool_call.details.get(_FUNCTION).get("name") if tool_call.details.get(_FUNCTION) else None,
342
+ "arguments": safe_loads(
343
+ tool_call.details.get(_FUNCTION).get("arguments") if tool_call.details.get(_FUNCTION) else None
344
+ ),
288
345
  }
289
346
  else:
290
347
  # Treat built-in tools separately. Object models may be unique so handle each case separately
@@ -322,27 +379,19 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
322
379
  # assistant's action of calling the tool.
323
380
  messages.append(AssistantMessage(run_id=run_id, content=[to_dict(content_tool_call)], createdAt=tool_call.created))
324
381
 
325
- if hasattr(tool_call.details, _FUNCTION):
326
- output = safe_loads(tool_call.details.function["output"])
382
+ if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
383
+ output = safe_loads(tool_call.details.get("function")["output"])
327
384
  else:
328
385
  try:
329
386
  # Some built-ins may have output, others may not
330
387
  # Try to retrieve it, but if we don't find anything, skip adding the message
331
388
  # Just manually converting to dicts for easy serialization for now rather than custom serializers
332
389
  if tool_call.details.type == _CODE_INTERPRETER:
333
- output = tool_call.details.code_interpreter.outputs
390
+ output = [result.as_dict() for result in tool_call.details.code_interpreter.outputs]
334
391
  elif tool_call.details.type == _BING_GROUNDING:
335
392
  return messages # not supported yet from bing grounding tool
336
393
  elif tool_call.details.type == _FILE_SEARCH:
337
- output = [
338
- {
339
- "file_id": result.file_id,
340
- "file_name": result.file_name,
341
- "score": result.score,
342
- "content": result.content,
343
- }
344
- for result in tool_call.details.file_search.results
345
- ]
394
+ output = [result.as_dict() for result in tool_call.details.file_search.results]
346
395
  elif tool_call.details.type == _AZURE_AI_SEARCH:
347
396
  output = tool_call.details.azure_ai_search["output"]
348
397
  elif tool_call.details.type == _FABRIC_DATAAGENT:
@@ -81,6 +81,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
81
81
  ~azure.ai.evaluation._promptflow.azure._lite_azure_management_client.LiteMLClient
82
82
  :param promptflow_run: The promptflow run used by the
83
83
  :type promptflow_run: Optional[promptflow._sdk.entities.Run]
84
+ :param tags: A dictionary of tags to be added to the evaluation run for tracking and organization purposes.
85
+ :type tags: Optional[Dict[str, str]]
84
86
  """
85
87
 
86
88
  _MAX_RETRIES = 5
@@ -98,6 +100,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
98
100
  workspace_name: str,
99
101
  management_client: LiteMLClient,
100
102
  promptflow_run: Optional[Run] = None,
103
+ tags: Optional[Dict[str, str]] = None,
101
104
  ) -> None:
102
105
  self._tracking_uri: str = tracking_uri
103
106
  self._subscription_id: str = subscription_id
@@ -107,6 +110,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
107
110
  self._is_promptflow_run: bool = promptflow_run is not None
108
111
  self._run_name = run_name
109
112
  self._promptflow_run = promptflow_run
113
+ self._tags = tags or {}
110
114
  self._status = RunStatus.NOT_STARTED
111
115
  self._url_base: Optional[str] = None
112
116
  self._info: Optional[RunInfo] = None
@@ -173,11 +177,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
173
177
  )
174
178
  else:
175
179
  url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/create"
180
+
181
+ # Prepare tags: start with user tags, ensure mlflow.user is set
182
+ run_tags = self._tags.copy()
183
+ if "mlflow.user" not in run_tags:
184
+ run_tags["mlflow.user"] = "azure-ai-evaluation"
185
+
186
+ # Convert tags to MLflow format
187
+ tags_list = [{"key": key, "value": value} for key, value in run_tags.items()]
188
+
176
189
  body = {
177
190
  "experiment_id": "0",
178
191
  "user_id": "azure-ai-evaluation",
179
192
  "start_time": int(time.time() * 1000),
180
- "tags": [{"key": "mlflow.user", "value": "azure-ai-evaluation"}],
193
+ "tags": tags_list,
181
194
  }
182
195
  if self._run_name:
183
196
  body["run_name"] = self._run_name
@@ -464,7 +464,7 @@ def _validate_columns_for_evaluators(
464
464
  )
465
465
 
466
466
 
467
- def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name):
467
+ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name, tags):
468
468
  if data is None:
469
469
  msg = "The 'data' parameter is required for evaluation."
470
470
  raise EvaluationException(
@@ -725,6 +725,7 @@ def evaluate(
725
725
  azure_ai_project: Optional[Union[str, AzureAIProject]] = None,
726
726
  output_path: Optional[Union[str, os.PathLike]] = None,
727
727
  fail_on_evaluator_errors: bool = False,
728
+ tags: Optional[Dict[str, str]] = None,
728
729
  **kwargs,
729
730
  ) -> EvaluationResult:
730
731
  """Evaluates target or data with built-in or custom evaluators. If both target and data are provided,
@@ -757,6 +758,10 @@ def evaluate(
757
758
  Defaults to false, which means that evaluations will continue regardless of failures.
758
759
  If such failures occur, metrics may be missing, and evidence of failures can be found in the evaluation's logs.
759
760
  :paramtype fail_on_evaluator_errors: bool
761
+ :keyword tags: A dictionary of tags to be added to the evaluation run for tracking and organization purposes.
762
+ Keys and values must be strings. For more information about tag limits, see:
763
+ https://learn.microsoft.com/en-us/azure/machine-learning/resource-limits-capacity?view=azureml-api-2#runs
764
+ :paramtype tags: Optional[Dict[str, str]]
760
765
  :keyword user_agent: A string to append to the default user-agent sent with evaluation http requests
761
766
  :paramtype user_agent: Optional[str]
762
767
  :return: Evaluation results.
@@ -793,6 +798,7 @@ def evaluate(
793
798
  azure_ai_project=azure_ai_project,
794
799
  output_path=output_path,
795
800
  fail_on_evaluator_errors=fail_on_evaluator_errors,
801
+ tags=tags,
796
802
  **kwargs,
797
803
  )
798
804
  except Exception as e:
@@ -861,6 +867,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
861
867
  azure_ai_project: Optional[Union[str, AzureAIProject]] = None,
862
868
  output_path: Optional[Union[str, os.PathLike]] = None,
863
869
  fail_on_evaluator_errors: bool = False,
870
+ tags: Optional[Dict[str, str]] = None,
864
871
  **kwargs,
865
872
  ) -> EvaluationResult:
866
873
  if fail_on_evaluator_errors:
@@ -877,6 +884,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
877
884
  azure_ai_project=azure_ai_project,
878
885
  evaluation_name=evaluation_name,
879
886
  fail_on_evaluator_errors=fail_on_evaluator_errors,
887
+ tags=tags,
880
888
  **kwargs,
881
889
  )
882
890
 
@@ -956,7 +964,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
956
964
  name_map = _map_names_to_builtins(evaluators, graders)
957
965
  if is_onedp_project(azure_ai_project):
958
966
  studio_url = _log_metrics_and_instance_results_onedp(
959
- metrics, results_df, azure_ai_project, evaluation_name, name_map, **kwargs
967
+ metrics, results_df, azure_ai_project, evaluation_name, name_map, tags=tags, **kwargs
960
968
  )
961
969
  else:
962
970
  # Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
@@ -964,7 +972,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
964
972
  studio_url = None
965
973
  if trace_destination:
966
974
  studio_url = _log_metrics_and_instance_results(
967
- metrics, results_df, trace_destination, None, evaluation_name, name_map, **kwargs
975
+ metrics, results_df, trace_destination, None, evaluation_name, name_map, tags=tags, **kwargs
968
976
  )
969
977
 
970
978
  result_df_dict = results_df.to_dict("records")
@@ -985,6 +993,7 @@ def _preprocess_data(
985
993
  azure_ai_project: Optional[Union[str, AzureAIProject]] = None,
986
994
  evaluation_name: Optional[str] = None,
987
995
  fail_on_evaluator_errors: bool = False,
996
+ tags: Optional[Dict[str, str]] = None,
988
997
  **kwargs,
989
998
  ) -> __ValidatedData:
990
999
  # Process evaluator config to replace ${target.} with ${data.}
@@ -992,7 +1001,7 @@ def _preprocess_data(
992
1001
  evaluator_config = {}
993
1002
 
994
1003
  input_data_df = _validate_and_load_data(
995
- target, data, evaluators_and_graders, output_path, azure_ai_project, evaluation_name
1004
+ target, data, evaluators_and_graders, output_path, azure_ai_project, evaluation_name, tags
996
1005
  )
997
1006
  if target is not None:
998
1007
  _validate_columns_for_target(input_data_df, target)