azure-ai-evaluation 1.11.0__tar.gz → 1.11.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (388) hide show
  1. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/CHANGELOG.md +11 -0
  2. {azure_ai_evaluation-1.11.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.11.2}/PKG-INFO +14 -2
  3. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/utils.py +68 -0
  4. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +27 -2
  5. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +13 -3
  6. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +86 -33
  7. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  8. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +7 -2
  9. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_version.py +1 -1
  10. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2/azure_ai_evaluation.egg-info}/PKG-INFO +14 -2
  11. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/requires.txt +2 -1
  12. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/setup.py +2 -2
  13. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_builtin_evaluators.py +0 -1
  14. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_score_model_grader.py +20 -3
  15. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_built_in_evaluator.py +2 -24
  16. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluate.py +76 -0
  17. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/MANIFEST.in +0 -0
  18. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/NOTICE.txt +0 -0
  19. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/README.md +0 -0
  20. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/TROUBLESHOOTING.md +0 -0
  21. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/__init__.py +0 -0
  22. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/__init__.py +0 -0
  23. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/__init__.py +0 -0
  24. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/__init__.py +0 -0
  25. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/aoai_grader.py +0 -0
  26. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/label_grader.py +0 -0
  27. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/python_grader.py +0 -0
  28. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/score_model_grader.py +0 -0
  29. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/string_check_grader.py +0 -0
  30. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/text_similarity_grader.py +0 -0
  31. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/__init__.py +0 -0
  32. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_clients.py +0 -0
  33. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_envs.py +0 -0
  34. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_models.py +0 -0
  35. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  36. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/__init__.py +0 -0
  37. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/_experimental.py +0 -0
  38. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/constants.py +0 -0
  39. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/evaluation_onedp_client.py +0 -0
  40. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/math.py +0 -0
  41. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/__init__.py +0 -0
  42. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_client.py +0 -0
  43. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_configuration.py +0 -0
  44. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
  45. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_patch.py +0 -0
  46. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
  47. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
  48. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_utils/__init__.py +0 -0
  49. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_utils/model_base.py +0 -0
  50. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_utils/serialization.py +0 -0
  51. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_validation.py +0 -0
  52. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
  53. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_version.py +0 -0
  54. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/__init__.py +0 -0
  55. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/_client.py +0 -0
  56. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +0 -0
  57. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/_patch.py +0 -0
  58. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +0 -0
  59. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +0 -0
  60. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +0 -0
  61. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/__init__.py +0 -0
  62. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/_enums.py +0 -0
  63. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/_models.py +0 -0
  64. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/_patch.py +0 -0
  65. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/operations/__init__.py +0 -0
  66. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/operations/_operations.py +0 -0
  67. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -0
  68. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
  69. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
  70. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
  71. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
  72. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
  73. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
  74. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
  75. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
  76. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
  77. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
  78. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
  79. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
  80. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
  81. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
  82. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
  83. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
  84. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
  85. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/rai_service.py +0 -0
  86. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
  87. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  88. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  89. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  90. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  91. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  92. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
  93. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  94. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  95. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  96. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  97. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  98. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  99. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  100. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  101. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  102. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  103. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  104. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  105. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
  106. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  107. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
  108. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_constants.py +0 -0
  109. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/__init__.py +0 -0
  110. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/_ai_services.py +0 -0
  111. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/_models.py +0 -0
  112. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/_sk_services.py +0 -0
  113. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_eval_mapping.py +0 -0
  114. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  115. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  116. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +0 -0
  117. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  118. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  119. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  120. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  121. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  122. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
  123. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_evaluate.py +0 -0
  124. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  125. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_utils.py +0 -0
  126. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  127. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  128. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
  129. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  130. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
  131. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  132. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
  133. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  134. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  135. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
  136. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
  137. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
  138. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  139. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  140. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
  141. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
  142. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
  143. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
  144. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
  145. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +0 -0
  146. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +0 -0
  147. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  148. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  149. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  150. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
  151. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  152. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
  153. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  154. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  155. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  156. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  157. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  158. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  159. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +0 -0
  160. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
  161. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  162. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  163. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  164. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  165. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  166. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
  167. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  168. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
  169. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  170. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  171. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +0 -0
  172. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
  173. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  174. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
  175. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  176. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  177. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
  178. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  179. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  180. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  181. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
  182. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  183. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  184. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +0 -0
  185. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
  186. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  187. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +0 -0
  188. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
  189. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  190. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
  191. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  192. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  193. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_exceptions.py +0 -0
  194. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_http_utils.py +0 -0
  195. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/__init__.py +0 -0
  196. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
  197. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
  198. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
  199. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
  200. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
  201. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
  202. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
  203. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
  204. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
  205. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
  206. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
  207. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
  208. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  209. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
  210. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  211. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
  212. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +0 -0
  213. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
  214. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  215. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +0 -0
  216. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
  217. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
  218. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
  219. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  220. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/__init__.py +0 -0
  221. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +0 -0
  222. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
  223. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +0 -0
  224. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  225. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
  226. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
  227. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
  228. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
  229. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  230. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_model_configurations.py +0 -0
  231. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
  232. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  233. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
  234. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_user_agent.py +0 -0
  235. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/__init__.py +0 -0
  236. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  237. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  238. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  239. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  240. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  241. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/py.typed +0 -0
  242. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/__init__.py +0 -0
  243. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/__init__.py +0 -0
  244. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +0 -0
  245. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +0 -0
  246. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +0 -0
  247. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +0 -0
  248. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_attack_objective_generator.py +0 -0
  249. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_attack_strategy.py +0 -0
  250. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_callback_chat_target.py +0 -0
  251. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
  252. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_evaluation_processor.py +0 -0
  253. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_mlflow_integration.py +0 -0
  254. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_orchestrator_manager.py +0 -0
  255. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_red_team.py +0 -0
  256. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_red_team_result.py +0 -0
  257. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_result_processor.py +0 -0
  258. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/__init__.py +0 -0
  259. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +0 -0
  260. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +0 -0
  261. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +0 -0
  262. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/constants.py +0 -0
  263. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/exception_utils.py +0 -0
  264. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/file_utils.py +0 -0
  265. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +0 -0
  266. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
  267. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/metric_mapping.py +0 -0
  268. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/progress_utils.py +0 -0
  269. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/retry_utils.py +0 -0
  270. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +0 -0
  271. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/__init__.py +0 -0
  272. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  273. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_adversarial_simulator.py +0 -0
  274. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_constants.py +0 -0
  275. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
  276. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
  277. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  278. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -0
  279. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  280. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  281. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  282. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  283. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  284. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
  285. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  286. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +0 -0
  287. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  288. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +0 -0
  289. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  290. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
  291. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  292. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  293. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  294. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  295. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_simulator.py +0 -0
  296. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_utils.py +0 -0
  297. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/SOURCES.txt +0 -0
  298. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  299. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  300. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  301. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/migration_guide.md +0 -0
  302. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/pyproject.toml +0 -0
  303. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/README.md +0 -0
  304. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
  305. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/instructions.md +0 -0
  306. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  307. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/response_completeness.ipynb +0 -0
  308. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  309. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/task_adherence.ipynb +0 -0
  310. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
  311. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/user_functions.py +0 -0
  312. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/aoai_score_model_grader_sample.py +0 -0
  313. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/data/custom_objectives_with_context_example.json +0 -0
  314. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/data/evaluate_test_data.jsonl +0 -0
  315. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_common.py +0 -0
  316. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_evaluate.py +0 -0
  317. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_evaluate_fdp.py +0 -0
  318. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_safety_evaluation.py +0 -0
  319. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_simulate.py +0 -0
  320. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_threshold.py +0 -0
  321. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/red_team_agent_tool_sample.py +0 -0
  322. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/red_team_samples.py +0 -0
  323. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/red_team_skip_upload.py +0 -0
  324. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/samples/semantic_kernel_red_team_agent_sample.py +0 -0
  325. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/setup.cfg +0 -0
  326. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/__init__.py +0 -0
  327. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/__openai_patcher.py +0 -0
  328. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/conftest.py +0 -0
  329. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
  330. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
  331. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
  332. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +0 -0
  333. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +0 -0
  334. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/__init__.py +0 -0
  335. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  336. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/target_fn.py +0 -0
  337. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_adv_simulator.py +0 -0
  338. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_aoai_graders.py +0 -0
  339. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_evaluate.py +0 -0
  340. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_lite_management_client.py +0 -0
  341. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_mass_evaluate.py +0 -0
  342. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_metrics_upload.py +0 -0
  343. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_prompty_async.py +0 -0
  344. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_red_team.py +0 -0
  345. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_remote_evaluation.py +0 -0
  346. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_sim_and_eval.py +0 -0
  347. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_agent_evaluators.py +0 -0
  348. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_alignment_missing_rows.py +0 -0
  349. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_evaluation_pagination.py +0 -0
  350. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_integration_features.py +0 -0
  351. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_python_grader.py +0 -0
  352. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_batch_run_context.py +0 -0
  353. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_completeness_evaluator.py +0 -0
  354. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  355. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_content_safety_rai_script.py +0 -0
  356. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_document_retrieval_evaluator.py +0 -0
  357. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_eval_run.py +0 -0
  358. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluate_mismatch.py +0 -0
  359. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluate_performance.py +0 -0
  360. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  361. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
  362. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  363. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
  364. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
  365. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_jailbreak_simulator.py +0 -0
  366. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_lazy_imports.py +0 -0
  367. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_non_adv_simulator.py +0 -0
  368. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/__init__.py +0 -0
  369. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_attack_objective_generator.py +0 -0
  370. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_attack_strategy.py +0 -0
  371. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_callback_chat_target.py +0 -0
  372. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_constants.py +0 -0
  373. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_formatting_utils.py +0 -0
  374. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +0 -0
  375. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_rai_service_target.py +0 -0
  376. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +0 -0
  377. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_red_team.py +0 -0
  378. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_red_team_language_support.py +0 -0
  379. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_red_team_result.py +0 -0
  380. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_strategy_utils.py +0 -0
  381. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_remote_evaluation_features.py +0 -0
  382. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_safety_evaluation.py +0 -0
  383. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_save_eval.py +0 -0
  384. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_simulator.py +0 -0
  385. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  386. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  387. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_tool_call_accuracy_evaluator.py +0 -0
  388. {azure_ai_evaluation-1.11.0 → azure_ai_evaluation-1.11.2}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,16 @@
1
1
  # Release History
2
2
 
3
+ ## 1.11.2 (2025-10-09)
4
+
5
+ ### Bugs Fixed
6
+
7
+ - **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
8
+
9
+ ## 1.11.1 (2025-09-17)
10
+
11
+ ### Bugs Fixed
12
+ - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
13
+
3
14
  ## 1.11.0 (2025-09-02)
4
15
 
5
16
  ### Features Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: azure-ai-evaluation
3
- Version: 1.11.0
3
+ Version: 1.11.2
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -28,13 +28,14 @@ Requires-Dist: nltk>=3.9.1
28
28
  Requires-Dist: azure-storage-blob>=12.10.0
29
29
  Requires-Dist: httpx>=0.25.1
30
30
  Requires-Dist: pandas<3.0.0,>=2.1.2
31
- Requires-Dist: openai>=1.78.0
31
+ Requires-Dist: openai>=1.108.0
32
32
  Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
33
33
  Requires-Dist: msrest>=0.6.21
34
34
  Requires-Dist: Jinja2>=3.1.6
35
35
  Requires-Dist: aiohttp>=3.0
36
36
  Provides-Extra: redteam
37
37
  Requires-Dist: pyrit==0.8.1; extra == "redteam"
38
+ Requires-Dist: duckdb==1.3.2; extra == "redteam"
38
39
  Dynamic: author
39
40
  Dynamic: author-email
40
41
  Dynamic: classifier
@@ -412,6 +413,17 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
412
413
 
413
414
  # Release History
414
415
 
416
+ ## 1.11.2 (2025-10-09)
417
+
418
+ ### Bugs Fixed
419
+
420
+ - **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
421
+
422
+ ## 1.11.1 (2025-09-17)
423
+
424
+ ### Bugs Fixed
425
+ - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
426
+
415
427
  ## 1.11.0 (2025-09-02)
416
428
 
417
429
  ### Features Added
@@ -659,6 +659,74 @@ def reformat_tool_definitions(tool_definitions, logger=None):
659
659
  return tool_definitions
660
660
 
661
661
 
662
+ def simplify_messages(messages, drop_system=True, drop_tool_calls=False, logger=None):
663
+ """
664
+ Simplify a list of conversation messages by keeping only role and content.
665
+ Optionally filter out system messages and/or tool calls.
666
+
667
+ :param messages: List of message dicts (e.g., from query or response)
668
+ :param drop_system: If True, remove system role messages
669
+ :param drop_tool_calls: If True, remove tool_call items from assistant content
670
+ :return: New simplified list of messages
671
+ """
672
+ if isinstance(messages, str):
673
+ return messages
674
+ try:
675
+ # Validate input is a list
676
+ if not isinstance(messages, list):
677
+ return messages
678
+
679
+ simplified_msgs = []
680
+ for msg in messages:
681
+ # Ensure msg is a dict
682
+ if not isinstance(msg, dict):
683
+ simplified_msgs.append(msg)
684
+ continue
685
+
686
+ role = msg.get("role")
687
+ content = msg.get("content", [])
688
+
689
+ # Drop system message (if should)
690
+ if drop_system and role == "system":
691
+ continue
692
+
693
+ # Simplify user messages
694
+ if role == "user":
695
+ simplified_msg = {
696
+ "role": role,
697
+ "content": _extract_text_from_content(content),
698
+ }
699
+ simplified_msgs.append(simplified_msg)
700
+ continue
701
+
702
+ # Drop tool results (if should)
703
+ if drop_tool_calls and role == "tool":
704
+ continue
705
+
706
+ # Simplify assistant messages
707
+ if role == "assistant":
708
+ simplified_content = _extract_text_from_content(content)
709
+ # Check if message has content
710
+ if simplified_content:
711
+ simplified_msg = {"role": role, "content": simplified_content}
712
+ simplified_msgs.append(simplified_msg)
713
+ continue
714
+
715
+ # Drop tool calls (if should)
716
+ if drop_tool_calls and any(c.get("type") == "tool_call" for c in content if isinstance(c, dict)):
717
+ continue
718
+
719
+ # If we reach here, it means we want to keep the message
720
+ simplified_msgs.append(msg)
721
+
722
+ return simplified_msgs
723
+
724
+ except Exception as ex:
725
+ if logger:
726
+ logger.debug(f"Error simplifying messages: {str(ex)}. Returning original messages.")
727
+ return messages
728
+
729
+
662
730
  def upload(path: str, container_client: ContainerClient, logger=None):
663
731
  """Upload files or directories to Azure Blob Storage using a container client.
664
732
 
@@ -272,8 +272,33 @@ def _get_single_run_results(
272
272
  for row_result in all_results:
273
273
  listed_results["index"].append(row_result.datasource_item_id)
274
274
  for single_grader_row_result in row_result.results:
275
- grader_name = run_info["grader_name_map"][single_grader_row_result["name"]]
276
- for name, value in single_grader_row_result.items():
275
+ if isinstance(single_grader_row_result, dict):
276
+ result_dict = single_grader_row_result
277
+ elif hasattr(single_grader_row_result, "model_dump"):
278
+ result_dict = single_grader_row_result.model_dump()
279
+ elif hasattr(single_grader_row_result, "dict"):
280
+ result_dict = single_grader_row_result.dict()
281
+ elif hasattr(single_grader_row_result, "__dict__"):
282
+ result_dict = vars(single_grader_row_result)
283
+ else:
284
+ raise EvaluationException(
285
+ message=("Unsupported AOAI evaluation result type: " f"{type(single_grader_row_result)!r}."),
286
+ blame=ErrorBlame.UNKNOWN,
287
+ category=ErrorCategory.FAILED_EXECUTION,
288
+ target=ErrorTarget.AOAI_GRADER,
289
+ )
290
+
291
+ grader_result_name = result_dict.get("name", None)
292
+ if grader_result_name is None:
293
+ raise EvaluationException(
294
+ message="AOAI evaluation response missing grader result name; unable to map to original grader.",
295
+ blame=ErrorBlame.UNKNOWN,
296
+ category=ErrorCategory.FAILED_EXECUTION,
297
+ target=ErrorTarget.AOAI_GRADER,
298
+ )
299
+
300
+ grader_name = run_info["grader_name_map"][grader_result_name]
301
+ for name, value in result_dict.items():
277
302
  if name in ["name"]:
278
303
  continue
279
304
  if name.lower() == "passed":
@@ -37,6 +37,8 @@ from azure.ai.evaluation._common._experimental import experimental
37
37
 
38
38
  from ._conversation_aggregators import GetAggregator, GetAggregatorType
39
39
 
40
+ import copy
41
+
40
42
  P = ParamSpec("P")
41
43
  T = TypeVar("T")
42
44
  T_EvalValue = TypeVar("T_EvalValue")
@@ -486,8 +488,12 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
486
488
  """
487
489
  tool_calls = []
488
490
  tool_results_map = {}
489
- if isinstance(response, list):
490
- for message in response:
491
+
492
+ # Work on a deep copy to avoid modifying the original object
493
+ response_copy = copy.deepcopy(response)
494
+
495
+ if isinstance(response_copy, list):
496
+ for message in response_copy:
491
497
  # Extract tool calls from assistant messages
492
498
  if message.get("role") == "assistant" and isinstance(message.get("content"), list):
493
499
  for content_item in message.get("content"):
@@ -519,7 +525,11 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
519
525
  :rtype: Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]
520
526
  """
521
527
  # Convert inputs into list of evaluable inputs.
522
- eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
528
+ try:
529
+ eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
530
+ except Exception as e:
531
+ print(f"Error converting kwargs to eval_input_list: {e}")
532
+ raise e
523
533
  per_turn_results = []
524
534
  # Evaluate all inputs.
525
535
  for eval_input in eval_input_list:
@@ -2,7 +2,7 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  import os, logging
5
- from typing import Dict, List, Optional, Union
5
+ from typing import Dict, List, Optional, Union, Any, Tuple
6
6
 
7
7
  from typing_extensions import overload, override
8
8
  from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
@@ -16,6 +16,7 @@ from ..._common.utils import (
16
16
  ErrorCategory,
17
17
  construct_prompty_model_config,
18
18
  validate_model_config,
19
+ simplify_messages,
19
20
  )
20
21
 
21
22
  try:
@@ -207,6 +208,42 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
207
208
 
208
209
  return super().__call__(*args, **kwargs)
209
210
 
211
+ def _has_context(self, eval_input: dict) -> bool:
212
+ """
213
+ Return True if eval_input contains a non-empty 'context' field.
214
+ Treats None, empty strings, empty lists, and lists of empty strings as no context.
215
+ """
216
+ context = eval_input.get("context", None)
217
+ if not context:
218
+ return False
219
+ if context == "<>": # Special marker for no context
220
+ return False
221
+ if isinstance(context, list):
222
+ return any(str(c).strip() for c in context)
223
+ if isinstance(context, str):
224
+ return bool(context.strip())
225
+ return True
226
+
227
+ @override
228
+ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
229
+ if "query" not in eval_input:
230
+ return await super()._do_eval(eval_input)
231
+
232
+ contains_context = self._has_context(eval_input)
233
+
234
+ simplified_query = simplify_messages(eval_input["query"], drop_tool_calls=contains_context)
235
+ simplified_response = simplify_messages(eval_input["response"], drop_tool_calls=False)
236
+
237
+ # Build simplified input
238
+ simplified_eval_input = {
239
+ "query": simplified_query,
240
+ "response": simplified_response,
241
+ "context": eval_input["context"],
242
+ }
243
+
244
+ # Replace and call the parent method
245
+ return await super()._do_eval(simplified_eval_input)
246
+
210
247
  async def _real_call(self, **kwargs):
211
248
  """The asynchronous call where real end-to-end evaluation logic is performed.
212
249
 
@@ -230,57 +267,73 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
230
267
  raise ex
231
268
 
232
269
  def _convert_kwargs_to_eval_input(self, **kwargs):
233
- if "context" in kwargs or "conversation" in kwargs:
270
+ if kwargs.get("context") or kwargs.get("conversation"):
234
271
  return super()._convert_kwargs_to_eval_input(**kwargs)
235
-
236
272
  query = kwargs.get("query")
237
273
  response = kwargs.get("response")
238
274
  tool_definitions = kwargs.get("tool_definitions")
239
275
 
240
- if not query or not response or not tool_definitions:
241
- msg = f"{type(self).__name__}: Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query', 'response' and 'tool_definitions' are required."
276
+ if (not query) or (not response): # or not tool_definitions:
277
+ msg = f"{type(self).__name__}: Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response' are required."
242
278
  raise EvaluationException(
243
279
  message=msg,
244
280
  blame=ErrorBlame.USER_ERROR,
245
281
  category=ErrorCategory.INVALID_VALUE,
246
282
  target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
247
283
  )
248
-
249
284
  context = self._get_context_from_agent_response(response, tool_definitions)
250
- if not context:
251
- raise EvaluationException(
252
- message=f"Context could not be extracted from agent response. Supported tools for groundedness are {self._SUPPORTED_TOOLS}. If supported tools are not used groundedness is not calculated.",
253
- blame=ErrorBlame.USER_ERROR,
254
- category=ErrorCategory.NOT_APPLICABLE,
255
- target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
256
- )
257
285
 
258
- return super()._convert_kwargs_to_eval_input(response=response[-1], context=context, query=query)
286
+ filtered_response = self._filter_file_search_results(response)
287
+ return super()._convert_kwargs_to_eval_input(response=filtered_response, context=context, query=query)
288
+
289
+ def _filter_file_search_results(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
290
+ """Filter out file_search tool results from the messages."""
291
+ file_search_ids = self._get_file_search_tool_call_ids(messages)
292
+ return [
293
+ msg for msg in messages if not (msg.get("role") == "tool" and msg.get("tool_call_id") in file_search_ids)
294
+ ]
259
295
 
260
296
  def _get_context_from_agent_response(self, response, tool_definitions):
297
+ """Extract context text from file_search tool results in the agent response."""
298
+ NO_CONTEXT = "<>"
261
299
  context = ""
262
300
  try:
263
301
  logger.debug("Extracting context from response")
264
302
  tool_calls = self._parse_tools_from_response(response=response)
265
- logger.debug(f"Tool Calls parsed successfully : {tool_calls}")
266
- if tool_calls:
267
- for tool_call in tool_calls:
268
- if isinstance(tool_call, dict) and tool_call.get("type") == "tool_call":
269
- tool_name = tool_call.get("name")
270
- for tool in tool_definitions:
271
- if tool.get("name") == tool_name and tool.get("type") in self._SUPPORTED_TOOLS:
272
- if tool_name == "file_search":
273
- tool_result = tool_call.get("tool_result")
274
- if tool_result:
275
- for result in tool_result:
276
- content_list = result.get("content")
277
- if content_list:
278
- for content in content_list:
279
- text = content.get("text")
280
- if text:
281
- context = context + "\n" + str(text)
303
+ logger.debug(f"Tool Calls parsed successfully: {tool_calls}")
304
+
305
+ if not tool_calls:
306
+ return NO_CONTEXT
307
+
308
+ context_lines = []
309
+ for tool_call in tool_calls:
310
+ if not isinstance(tool_call, dict) or tool_call.get("type") != "tool_call":
311
+ continue
312
+
313
+ tool_name = tool_call.get("name")
314
+ if tool_name != "file_search":
315
+ continue
316
+
317
+ # Extract tool results
318
+ for result in tool_call.get("tool_result", []):
319
+ results = result if isinstance(result, list) else [result]
320
+ for r in results:
321
+ file_name = r.get("file_name", "Unknown file name")
322
+ for content in r.get("content", []):
323
+ text = content.get("text")
324
+ if text:
325
+ context_lines.append(f"{file_name}:\n- {text}---\n\n")
326
+
327
+ context = "\n".join(context_lines) if len(context_lines) > 0 else None
328
+
282
329
  except Exception as ex:
283
330
  logger.debug(f"Error extracting context from agent response : {str(ex)}")
284
- context = ""
331
+ context = None
332
+
333
+ context = context if context else NO_CONTEXT
334
+ return context
285
335
 
286
- return context if context else None
336
+ def _get_file_search_tool_call_ids(self, query_or_response):
337
+ """Return a list of tool_call_ids for file search tool calls."""
338
+ tool_calls = self._parse_tools_from_response(query_or_response)
339
+ return [tc.get("tool_call_id") for tc in tool_calls if tc.get("name") == "file_search"]
@@ -32,52 +32,53 @@ system:
32
32
 
33
33
  user:
34
34
  # Definition
35
- **Groundedness** refers to how well an answer is anchored in the provided context, evaluating its relevance, accuracy, and completeness based exclusively on that context. It assesses the extent to which the answer directly and fully addresses the question without introducing unrelated or incorrect information. The scale ranges from 1 to 5, with higher numbers indicating greater groundedness.
35
+ **Groundedness** refers to how well an answer is anchored in the provided context, evaluating its relevance, accuracy, and completeness based exclusively on that context. It assesses the extent to which the answer directly and fully addresses the question without introducing unrelated or incorrect information.
36
+
37
+ > Context is the source of truth for evaluating the response. If it's empty, rely on the tool results in the response and query.
38
+ > Evaluate the groundedness of the response message, not the chat history.
36
39
 
37
40
  # Ratings
38
41
  ## [Groundedness: 1] (Completely Unrelated Response)
39
- **Definition:** An answer that does not relate to the question or the context in any way. It fails to address the topic, provides irrelevant information, or introduces completely unrelated subjects.
42
+ **Definition:** An answer that does not relate to the question or the context in any way.
43
+ - Does not relate to the question or context at all.
44
+ - Talks about the general topic but does not respond to the query.
40
45
 
41
46
  **Examples:**
42
47
  **Context:** The company's annual meeting will be held next Thursday.
43
48
  **Query:** When is the company's annual meeting?
44
49
  **Response:** I enjoy hiking in the mountains during summer.
45
50
 
46
- **Context:** The new policy aims to reduce carbon emissions by 20% over the next five years.
47
- **Query:** What is the goal of the new policy?
48
- **Response:** My favorite color is blue.
49
-
50
- ## [Groundedness: 2] (Related Topic but Does Not Respond to the Query)
51
- **Definition:** An answer that relates to the general topic of the context but does not answer the specific question asked. It may mention concepts from the context but fails to provide a direct or relevant response.
52
-
53
- **Examples:**
54
51
  **Context:** The museum will exhibit modern art pieces from various local artists.
55
52
  **Query:** What kind of art will be exhibited at the museum?
56
53
  **Response:** Museums are important cultural institutions.
57
54
 
58
- **Context:** The new software update improves battery life and performance.
59
- **Query:** What does the new software update improve?
60
- **Response:** Software updates can sometimes fix bugs.
61
-
62
- ## [Groundedness: 3] (Attempts to Respond but Contains Incorrect Information)
63
- **Definition:** An answer that attempts to respond to the question but includes incorrect information not supported by the context. It may misstate facts, misinterpret the context, or provide erroneous details.
55
+ ## [Groundedness: 2] (Attempts to Respond but Contains Incorrect Information)
56
+ **Definition:** An answer that attempts to respond to the question but includes incorrect information not supported by the context. It may misstate facts, misinterpret the context, or provide erroneous details. Even if some points are correct, the presence of inaccuracies makes the response unreliable.
64
57
 
65
58
  **Examples:**
66
- **Context:** The festival starts on June 5th and features international musicians.
59
+ **Context:** - The festival starts on June 5th and features international musicians.
67
60
  **Query:** When does the festival start?
68
61
  **Response:** The festival starts on July 5th and features local artists.
69
62
 
70
- **Context:** The recipe requires two eggs and one cup of milk.
71
- **Query:** How many eggs are needed for the recipe?
72
- **Response:** You need three eggs for the recipe.
63
+ **Context:** bakery_menu.txt: - Croissant au Beurre flaky, buttery croissant
64
+ **Query:** [{"role":"user","content":"Are there croissants?"}]
65
+ **Response:** [{"role":"assistant","content":"Yes, Croissant au Beurre is on the menu, served with jam."}]
66
+
67
+ ## [Groundedness: 3] (Nothing to be Grounded)
68
+ Definition: An answer that does not provide any information that can be evaluated against the context. This includes responses that are asking for clarification, providing polite fillers, or follow-up questions.
69
+
70
+ **Examples:**
71
+ **Context:**
72
+ **Query:** [{"role":"user","content":"How many eggs are needed for the recipe?"}, {"role":"tool","content":"tool_result": [{"file_name": "recipe.txt", "content": "The recipe requires two eggs and one cup of milk."}]}, {"role":"assistant","content":"You need three eggs for the recipe."}, {"role":"user","content":"Thank you."}]
73
+ **Response:** [{"role":"assistant","content":"You're welcome, anything else I can help with?"}]
73
74
 
74
75
  ## [Groundedness: 4] (Partially Correct Response)
75
76
  **Definition:** An answer that provides a correct response to the question but is incomplete or lacks specific details mentioned in the context. It captures some of the necessary information but omits key elements needed for a full understanding.
76
77
 
77
78
  **Examples:**
78
- **Context:** The bookstore offers a 15% discount to students and a 10% discount to senior citizens.
79
- **Query:** What discount does the bookstore offer to students?
80
- **Response:** Students get a discount at the bookstore.
79
+ **Context:** - store_details.txt: The bookstore offers a 15% discount to students and a 10% discount to senior citizens.
80
+ **Query:** [{"role":"user","content":"What discount does the bookstore offer to students, if any?"}]
81
+ **Response:** [{"role":"assistant","content":"Yes, students get a discount at the bookstore."}]
81
82
 
82
83
  **Context:** The company's headquarters are located in Berlin, Germany.
83
84
  **Query:** Where are the company's headquarters?
@@ -87,13 +88,13 @@ user:
87
88
  **Definition:** An answer that thoroughly and accurately responds to the question, including all relevant details from the context. It directly addresses the question with precise information, demonstrating complete understanding without adding extraneous information.
88
89
 
89
90
  **Examples:**
90
- **Context:** The author released her latest novel, 'The Silent Echo', on September 1st.
91
- **Query:** When was 'The Silent Echo' released?
92
- **Response:** 'The Silent Echo' was released on September 1st.
91
+ **CONTEXT:** The author released her latest novel, 'The Silent Echo', on September 1st.
92
+ **QUERY:** [{"role":"user","content":"When was 'The Silent Echo' released?"}]
93
+ **RESPONSE:** [{"role":"assistant","content":"The 'Silent Echo' was released on September 1st."}]
93
94
 
94
- **Context:** Participants must register by May 31st to be eligible for early bird pricing.
95
+ **Context:**
95
96
  **Query:** By what date must participants register to receive early bird pricing?
96
- **Response:** Participants must register by May 31st to receive early bird pricing.
97
+ **Response:** [{"role":"tool","content":"tool_result": [{"file_name": "store_guidelines.txt", "content": "Participants registering before and including May 31st will be eligible for early bird pricing."}]}, {"role":"assistant","content":"Participants must register by May 31st to receive early bird pricing."}]
97
98
 
98
99
 
99
100
  # Data
@@ -103,7 +104,7 @@ RESPONSE: {{response}}
103
104
 
104
105
 
105
106
  # Tasks
106
- ## Please provide your assessment Score for the previous RESPONSE in relation to the CONTEXT and QUERY based on the Definitions above. Your output should include the following information:
107
+ ## Please provide your assessment Score for the previous RESPONSE message in relation to the CONTEXT, QUERY and RESPONSE tools based on the Definitions above. Your output should include the following information:
107
108
  - **ThoughtChain**: To improve the reasoning process, think step by step and include a step-by-step explanation of your thought process as you analyze the data based on the definitions. Keep it brief and start your ThoughtChain with "Let's think step by step:".
108
109
  - **Explanation**: a very short explanation of why you think the input Data should get that Score.
109
110
  - **Score**: based on your previous analysis, provide your Score. The Score you give MUST be a integer score (i.e., "1", "2"...) based on the levels of the definitions.
@@ -344,8 +344,13 @@ class BatchEngine:
344
344
 
345
345
  func_params = inspect.signature(self._func).parameters
346
346
 
347
- filtered_params = {key: value for key, value in inputs.items() if key in func_params}
348
- return filtered_params
347
+ has_kwargs = any(p.kind == p.VAR_KEYWORD for p in func_params.values())
348
+
349
+ if has_kwargs:
350
+ return inputs
351
+ else:
352
+ filtered_params = {key: value for key, value in inputs.items() if key in func_params}
353
+ return filtered_params
349
354
 
350
355
  async def _exec_line_async(
351
356
  self,
@@ -3,4 +3,4 @@
3
3
  # ---------------------------------------------------------
4
4
  # represents upcoming version
5
5
 
6
- VERSION = "1.11.0"
6
+ VERSION = "1.11.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: azure-ai-evaluation
3
- Version: 1.11.0
3
+ Version: 1.11.2
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -28,13 +28,14 @@ Requires-Dist: nltk>=3.9.1
28
28
  Requires-Dist: azure-storage-blob>=12.10.0
29
29
  Requires-Dist: httpx>=0.25.1
30
30
  Requires-Dist: pandas<3.0.0,>=2.1.2
31
- Requires-Dist: openai>=1.78.0
31
+ Requires-Dist: openai>=1.108.0
32
32
  Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
33
33
  Requires-Dist: msrest>=0.6.21
34
34
  Requires-Dist: Jinja2>=3.1.6
35
35
  Requires-Dist: aiohttp>=3.0
36
36
  Provides-Extra: redteam
37
37
  Requires-Dist: pyrit==0.8.1; extra == "redteam"
38
+ Requires-Dist: duckdb==1.3.2; extra == "redteam"
38
39
  Dynamic: author
39
40
  Dynamic: author-email
40
41
  Dynamic: classifier
@@ -412,6 +413,17 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
412
413
 
413
414
  # Release History
414
415
 
416
+ ## 1.11.2 (2025-10-09)
417
+
418
+ ### Bugs Fixed
419
+
420
+ - **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
421
+
422
+ ## 1.11.1 (2025-09-17)
423
+
424
+ ### Bugs Fixed
425
+ - Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
426
+
415
427
  ## 1.11.0 (2025-09-02)
416
428
 
417
429
  ### Features Added
@@ -5,7 +5,7 @@ nltk>=3.9.1
5
5
  azure-storage-blob>=12.10.0
6
6
  httpx>=0.25.1
7
7
  pandas<3.0.0,>=2.1.2
8
- openai>=1.78.0
8
+ openai>=1.108.0
9
9
  ruamel.yaml<1.0.0,>=0.17.10
10
10
  msrest>=0.6.21
11
11
  Jinja2>=3.1.6
@@ -13,3 +13,4 @@ aiohttp>=3.0
13
13
 
14
14
  [redteam]
15
15
  pyrit==0.8.1
16
+ duckdb==1.3.2
@@ -75,13 +75,13 @@ setup(
75
75
  "httpx>=0.25.1",
76
76
  # Dependencies added since Promptflow will soon be made optional
77
77
  "pandas>=2.1.2,<3.0.0",
78
- "openai>=1.78.0",
78
+ "openai>=1.108.0",
79
79
  "ruamel.yaml>=0.17.10,<1.0.0",
80
80
  "msrest>=0.6.21",
81
81
  "Jinja2>=3.1.6",
82
82
  "aiohttp>=3.0",
83
83
  ],
84
- extras_require={"redteam": ["pyrit==0.8.1"]},
84
+ extras_require={"redteam": ["pyrit==0.8.1", "duckdb==1.3.2"]},
85
85
  project_urls={
86
86
  "Bug Reports": "https://github.com/Azure/azure-sdk-for-python/issues",
87
87
  "Source": "https://github.com/Azure/azure-sdk-for-python",
@@ -1233,7 +1233,6 @@ class TestBuiltInEvaluators:
1233
1233
  "evaluator_cls",
1234
1234
  [
1235
1235
  RelevanceEvaluator,
1236
- GroundednessEvaluator,
1237
1236
  FluencyEvaluator,
1238
1237
  SimilarityEvaluator,
1239
1238
  CoherenceEvaluator,
@@ -20,6 +20,22 @@ from azure.ai.evaluation._evaluate._evaluate_aoai import (
20
20
  )
21
21
 
22
22
 
23
+ def _sampling_params_as_dict(value):
24
+ """Normalize sampling params to a plain dictionary for assertions."""
25
+
26
+ if value is None:
27
+ return {}
28
+ if isinstance(value, dict):
29
+ return value
30
+ if hasattr(value, "model_dump"):
31
+ return value.model_dump(exclude_none=True)
32
+ if hasattr(value, "dict"):
33
+ return value.dict(exclude_none=True)
34
+ if hasattr(value, "__dict__"):
35
+ return {k: v for k, v in vars(value).items() if v is not None and not k.startswith("_")}
36
+ return value
37
+
38
+
23
39
  @pytest.fixture
24
40
  def mock_aoai_model_config():
25
41
  """Mock Azure OpenAI model configuration for testing."""
@@ -200,8 +216,9 @@ class TestAzureOpenAIScoreModelGrader:
200
216
  assert config.input[0].role == "system"
201
217
  assert config.input[1].role == "user"
202
218
  assert config.range == [0.0, 1.0]
203
- assert config.sampling_params["temperature"] == 0.0
204
- assert config.sampling_params["max_tokens"] == 100
219
+ sampling_params = _sampling_params_as_dict(config.sampling_params)
220
+ assert sampling_params["temperature"] == 0.0
221
+ assert sampling_params["max_tokens"] == 100
205
222
  assert grader.pass_threshold == 0.5
206
223
 
207
224
  def test_different_score_ranges(self, mock_aoai_model_config):
@@ -534,7 +551,7 @@ class TestAzureOpenAIScoreModelGraderEdgeCases:
534
551
  sampling_params=complex_params,
535
552
  )
536
553
 
537
- assert grader._grader_config.sampling_params == complex_params
554
+ assert _sampling_params_as_dict(grader._grader_config.sampling_params) == complex_params
538
555
 
539
556
  def test_grader_with_unicode_content(self, mock_aoai_model_config):
540
557
  """Test grader with Unicode and special characters in content."""