azure-ai-evaluation 1.1.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (180) hide show
  1. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/CHANGELOG.md +72 -1
  2. {azure_ai_evaluation-1.1.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.3.0}/PKG-INFO +77 -7
  3. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/README.md +1 -1
  4. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/__init__.py +1 -15
  5. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_clients.py +24 -8
  6. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_models.py +2 -2
  7. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/utils.py +8 -8
  8. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_constants.py +21 -0
  9. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +2 -1
  10. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_eval_run.py +3 -1
  11. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_evaluate.py +74 -14
  12. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_utils.py +27 -0
  13. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +46 -25
  14. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  15. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +69 -4
  16. azure_ai_evaluation-1.3.0/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +61 -0
  17. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +7 -1
  18. azure_ai_evaluation-1.3.0/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  19. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +5 -42
  20. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +2 -0
  21. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +2 -0
  22. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +2 -0
  23. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +2 -0
  24. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +61 -68
  25. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +45 -23
  26. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +55 -34
  27. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +32 -27
  28. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +44 -23
  29. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +41 -81
  30. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_exceptions.py +0 -1
  31. azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +640 -0
  32. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_version.py +2 -1
  33. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +10 -3
  34. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
  35. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
  36. azure_ai_evaluation-1.3.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  37. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
  38. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_simulator.py +21 -13
  39. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0/azure_ai_evaluation.egg-info}/PKG-INFO +77 -7
  40. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/SOURCES.txt +7 -9
  41. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/requires.txt +2 -2
  42. azure_ai_evaluation-1.3.0/migration_guide.md +243 -0
  43. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/README.md +1 -1
  44. azure_ai_evaluation-1.3.0/samples/evaluation_samples_safety_evaluation.py +251 -0
  45. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_simulate.py +1 -1
  46. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/setup.py +4 -5
  47. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/conftest.py +25 -2
  48. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_adv_simulator.py +1 -2
  49. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_builtin_evaluators.py +0 -16
  50. azure_ai_evaluation-1.3.0/tests/e2etests/test_evaluate.py +501 -0
  51. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_lite_management_client.py +12 -3
  52. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_mass_evaluate.py +89 -91
  53. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_metrics_upload.py +11 -1
  54. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_sim_and_eval.py +8 -5
  55. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_built_in_evaluator.py +1 -1
  56. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate.py +206 -35
  57. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_performance.py +9 -13
  58. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +42 -0
  59. azure_ai_evaluation-1.3.0/tests/unittests/test_safety_evaluation.py +215 -0
  60. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_save_eval.py +6 -4
  61. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_callback_conv_bot.py +5 -4
  62. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  63. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  64. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  65. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  66. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  67. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  68. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  69. azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  70. azure_ai_evaluation-1.1.0/tests/__pf_service_isolation.py +0 -28
  71. azure_ai_evaluation-1.1.0/tests/e2etests/test_evaluate.py +0 -953
  72. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/MANIFEST.in +0 -0
  73. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/NOTICE.txt +0 -0
  74. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/TROUBLESHOOTING.md +0 -0
  75. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/__init__.py +0 -0
  76. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/__init__.py +0 -0
  77. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  78. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  79. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/__init__.py +0 -0
  80. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  81. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/constants.py +0 -0
  82. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/math.py +0 -0
  83. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/rai_service.py +0 -0
  84. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  85. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  86. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  87. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  88. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  89. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  90. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  91. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  92. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  93. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
  94. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  95. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
  96. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  97. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  98. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  99. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  100. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  101. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
  102. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  103. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  104. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  105. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -0
  106. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  107. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  108. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  109. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  110. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  111. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  112. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  113. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
  114. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  115. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  116. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
  117. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  118. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  119. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  120. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  121. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  122. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  123. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  124. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  125. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_http_utils.py +0 -0
  126. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  127. {azure_ai_evaluation-1.1.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
  128. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_user_agent.py +0 -0
  129. {azure_ai_evaluation-1.1.0/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
  130. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  131. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  132. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  133. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  134. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  135. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/py.typed +0 -0
  136. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  137. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  138. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  139. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  140. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  141. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  142. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  143. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  144. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  145. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
  146. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  147. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  148. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  149. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
  150. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  151. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  152. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  153. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  154. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_tracing.py +0 -0
  155. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  156. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  157. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  158. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  159. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/pyproject.toml +0 -0
  160. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/data/evaluate_test_data.jsonl +0 -0
  161. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_common.py +0 -0
  162. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_evaluate.py +0 -0
  163. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/setup.cfg +0 -0
  164. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/__init__.py +0 -0
  165. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/__openai_patcher.py +0 -0
  166. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/__init__.py +0 -0
  167. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  168. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/target_fn.py +0 -0
  169. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_batch_run_context.py +0 -0
  170. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  171. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
  172. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_eval_run.py +0 -0
  173. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_telemetry.py +0 -0
  174. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
  175. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  176. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  177. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  178. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_simulator.py +0 -0
  179. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  180. {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_utils.py +0 -0
@@ -1,10 +1,81 @@
1
1
  # Release History
2
2
 
3
- ## 1.1.0 (2024-12-12)
3
+ ## 1.3.0 (2025-02-28)
4
+
5
+ ### Breaking Changes
6
+ - Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
7
+ - Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
8
+
9
+ ## 1.2.0 (2025-01-27)
10
+
11
+ ### Features Added
12
+ - CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
13
+
14
+ ### Breaking Changes
15
+ - `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
4
16
 
5
17
  ### Bugs Fixed
6
18
  - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
7
19
  - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
20
+ - Fixed the non adversarial simulator to run in task-free mode
21
+ - Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
22
+ main score when aggregating per-turn evaluations from a conversation into an overall
23
+ evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
24
+ - Fixed bug in non adversarial simulator sample where `tasks` undefined
25
+
26
+ ### Other Changes
27
+ - Changed minimum required python version to use this package from 3.8 to 3.9
28
+ - Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
29
+ - Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
30
+ environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
31
+
32
+ ## 1.1.0 (2024-12-12)
33
+
34
+ ### Features Added
35
+ - Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
36
+
37
+ ```python
38
+ evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
39
+ conversation = {
40
+ "messages": [
41
+ {
42
+ "role": "system",
43
+ "content": [
44
+ {"type": "text", "text": "You are an AI assistant that understands images."}
45
+ ],
46
+ },
47
+ {
48
+ "role": "user",
49
+ "content": [
50
+ {"type": "text", "text": "Can you describe this image?"},
51
+ {
52
+ "type": "image_url",
53
+ "image_url": {
54
+ "url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
55
+ },
56
+ },
57
+ ],
58
+ },
59
+ {
60
+ "role": "assistant",
61
+ "content": [
62
+ {
63
+ "type": "text",
64
+ "text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
65
+ }
66
+ ],
67
+ },
68
+ ]
69
+ }
70
+ print("Calling Content Safety Evaluator for multi-modal")
71
+ score = evaluator(conversation=conversation)
72
+ ```
73
+
74
+ - Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
75
+
76
+ ### Bugs Fixed
77
+ - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
78
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
8
79
 
9
80
  ## 1.0.1 (2024-11-15)
10
81
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.1.0
3
+ Version: 1.3.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -13,17 +13,16 @@ Classifier: Development Status :: 5 - Production/Stable
13
13
  Classifier: Programming Language :: Python
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Programming Language :: Python :: 3 :: Only
16
- Classifier: Programming Language :: Python :: 3.8
17
16
  Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
20
19
  Classifier: License :: OSI Approved :: MIT License
21
20
  Classifier: Operating System :: OS Independent
22
- Requires-Python: >=3.8
21
+ Requires-Python: >=3.9
23
22
  Description-Content-Type: text/markdown
24
23
  License-File: NOTICE.txt
25
- Requires-Dist: promptflow-devkit>=1.15.0
26
- Requires-Dist: promptflow-core>=1.15.0
24
+ Requires-Dist: promptflow-devkit>=1.17.1
25
+ Requires-Dist: promptflow-core>=1.17.1
27
26
  Requires-Dist: pyjwt>=2.8.0
28
27
  Requires-Dist: azure-identity>=1.16.0
29
28
  Requires-Dist: azure-core>=1.30.2
@@ -54,7 +53,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
54
53
 
55
54
  ### Prerequisites
56
55
 
57
- - Python 3.8 or later is required to use this package.
56
+ - Python 3.9 or later is required to use this package.
58
57
  - [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
59
58
 
60
59
  ### Install the package
@@ -378,11 +377,82 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
378
377
 
379
378
  # Release History
380
379
 
381
- ## 1.1.0 (2024-12-12)
380
+ ## 1.3.0 (2025-02-28)
381
+
382
+ ### Breaking Changes
383
+ - Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
384
+ - Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
385
+
386
+ ## 1.2.0 (2025-01-27)
387
+
388
+ ### Features Added
389
+ - CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
390
+
391
+ ### Breaking Changes
392
+ - `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
382
393
 
383
394
  ### Bugs Fixed
384
395
  - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
385
396
  - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
397
+ - Fixed the non adversarial simulator to run in task-free mode
398
+ - Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
399
+ main score when aggregating per-turn evaluations from a conversation into an overall
400
+ evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
401
+ - Fixed bug in non adversarial simulator sample where `tasks` undefined
402
+
403
+ ### Other Changes
404
+ - Changed minimum required python version to use this package from 3.8 to 3.9
405
+ - Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
406
+ - Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
407
+ environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
408
+
409
+ ## 1.1.0 (2024-12-12)
410
+
411
+ ### Features Added
412
+ - Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
413
+
414
+ ```python
415
+ evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
416
+ conversation = {
417
+ "messages": [
418
+ {
419
+ "role": "system",
420
+ "content": [
421
+ {"type": "text", "text": "You are an AI assistant that understands images."}
422
+ ],
423
+ },
424
+ {
425
+ "role": "user",
426
+ "content": [
427
+ {"type": "text", "text": "Can you describe this image?"},
428
+ {
429
+ "type": "image_url",
430
+ "image_url": {
431
+ "url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
432
+ },
433
+ },
434
+ ],
435
+ },
436
+ {
437
+ "role": "assistant",
438
+ "content": [
439
+ {
440
+ "type": "text",
441
+ "text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
442
+ }
443
+ ],
444
+ },
445
+ ]
446
+ }
447
+ print("Calling Content Safety Evaluator for multi-modal")
448
+ score = evaluator(conversation=conversation)
449
+ ```
450
+
451
+ - Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
452
+
453
+ ### Bugs Fixed
454
+ - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
455
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
386
456
 
387
457
  ## 1.0.1 (2024-11-15)
388
458
 
@@ -22,7 +22,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
22
22
 
23
23
  ### Prerequisites
24
24
 
25
- - Python 3.8 or later is required to use this package.
25
+ - Python 3.9 or later is required to use this package.
26
26
  - [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
27
27
 
28
28
  ### Install the package
@@ -12,14 +12,6 @@ from ._evaluators._content_safety import (
12
12
  SexualEvaluator,
13
13
  ViolenceEvaluator,
14
14
  )
15
- from ._evaluators._multimodal._content_safety_multimodal import (
16
- ContentSafetyMultimodalEvaluator,
17
- HateUnfairnessMultimodalEvaluator,
18
- SelfHarmMultimodalEvaluator,
19
- SexualMultimodalEvaluator,
20
- ViolenceMultimodalEvaluator,
21
- )
22
- from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
23
15
  from ._evaluators._f1_score import F1ScoreEvaluator
24
16
  from ._evaluators._fluency import FluencyEvaluator
25
17
  from ._evaluators._gleu import GleuScoreEvaluator
@@ -72,11 +64,5 @@ __all__ = [
72
64
  "EvaluatorConfig",
73
65
  "Conversation",
74
66
  "Message",
75
- "EvaluationResult",
76
- "ContentSafetyMultimodalEvaluator",
77
- "HateUnfairnessMultimodalEvaluator",
78
- "SelfHarmMultimodalEvaluator",
79
- "SexualMultimodalEvaluator",
80
- "ViolenceMultimodalEvaluator",
81
- "ProtectedMaterialMultimodalEvaluator",
67
+ "EvaluationResult"
82
68
  ]
@@ -17,7 +17,7 @@ from azure.ai.evaluation.simulator._model_tools._identity_manager import TokenSc
17
17
  from ._models import BlobStoreInfo, Workspace
18
18
 
19
19
 
20
- API_VERSION: Final[str] = "2024-10-01"
20
+ API_VERSION: Final[str] = "2024-07-01-preview"
21
21
  QUERY_KEY_API_VERSION: Final[str] = "api-version"
22
22
  PATH_ML_WORKSPACES = ("providers", "Microsoft.MachineLearningServices", "workspaces")
23
23
 
@@ -69,7 +69,9 @@ class LiteMLClient:
69
69
  self._get_token_manager()
70
70
  return cast(TokenCredential, self._credential)
71
71
 
72
- def workspace_get_default_datastore(self, workspace_name: str, include_credentials: bool = False) -> BlobStoreInfo:
72
+ def workspace_get_default_datastore(
73
+ self, workspace_name: str, *, include_credentials: bool = False, **kwargs: Any
74
+ ) -> BlobStoreInfo:
73
75
  # 1. Get the default blob store
74
76
  # REST API documentation:
75
77
  # https://learn.microsoft.com/rest/api/azureml/datastores/list?view=rest-azureml-2024-10-01
@@ -92,18 +94,29 @@ class LiteMLClient:
92
94
  account_name = props_json["accountName"]
93
95
  endpoint = props_json["endpoint"]
94
96
  container_name = props_json["containerName"]
97
+ credential_type = props_json.get("credentials", {}).get("credentialsType")
95
98
 
96
99
  # 2. Get the SAS token to use for accessing the blob store
97
100
  # REST API documentation:
98
101
  # https://learn.microsoft.com/rest/api/azureml/datastores/list-secrets?view=rest-azureml-2024-10-01
99
- blob_store_credential: Optional[Union[AzureSasCredential, str]] = None
100
- if include_credentials:
102
+ blob_store_credential: Optional[Union[AzureSasCredential, TokenCredential, str]]
103
+ if not include_credentials:
104
+ blob_store_credential = None
105
+ elif credential_type and credential_type.lower() == "none":
106
+ # If storage account key access is disabled, and only Microsoft Entra ID authentication is available,
107
+ # the credentialsType will be "None" and we should not attempt to get the secrets.
108
+ blob_store_credential = self.get_credential()
109
+ else:
101
110
  url = self._generate_path(
102
111
  *PATH_ML_WORKSPACES, workspace_name, "datastores", "workspaceblobstore", "listSecrets"
103
112
  )
104
113
  secrets_response = self._http_client.request(
105
114
  method="POST",
106
115
  url=url,
116
+ json={
117
+ "expirableSecret": True,
118
+ "expireAfterHours": int(kwargs.get("key_expiration_hours", 1)),
119
+ },
107
120
  params={
108
121
  QUERY_KEY_API_VERSION: self._api_version,
109
122
  },
@@ -114,10 +127,13 @@ class LiteMLClient:
114
127
  secrets_json = secrets_response.json()
115
128
  secrets_type = secrets_json["secretsType"].lower()
116
129
 
130
+ # As per this website, only SAS tokens, access tokens, or Entra IDs are valid for accessing blob data
131
+ # stores:
132
+ # https://learn.microsoft.com/rest/api/storageservices/authorize-requests-to-azure-storage.
117
133
  if secrets_type == "sas":
118
134
  blob_store_credential = AzureSasCredential(secrets_json["sasToken"])
119
135
  elif secrets_type == "accountkey":
120
- # To support olders versions of azure-storage-blob better, we return a string here instead of
136
+ # To support older versions of azure-storage-blob better, we return a string here instead of
121
137
  # an AzureNamedKeyCredential
122
138
  blob_store_credential = secrets_json["key"]
123
139
  else:
@@ -164,19 +180,19 @@ class LiteMLClient:
164
180
  # nothing to see here, move along
165
181
  return
166
182
 
167
- additional_info: Optional[str] = None
183
+ message = f"The {description} request failed with HTTP {response.status_code}"
168
184
  try:
169
185
  error_json = response.json()["error"]
170
186
  additional_info = f"({error_json['code']}) {error_json['message']}"
187
+ message += f" - {additional_info}"
171
188
  except (JSONDecodeError, ValueError, KeyError):
172
189
  pass
173
190
 
174
191
  raise EvaluationException(
175
- message=f"The {description} request failed with HTTP {response.status_code}",
192
+ message=message,
176
193
  target=ErrorTarget.EVALUATE,
177
194
  category=ErrorCategory.FAILED_EXECUTION,
178
195
  blame=ErrorBlame.SYSTEM_ERROR,
179
- internal_message=additional_info,
180
196
  )
181
197
 
182
198
  def _generate_path(self, *paths: str) -> str:
@@ -8,7 +8,7 @@
8
8
 
9
9
  from typing import Dict, List, NamedTuple, Optional, Union
10
10
  from msrest.serialization import Model
11
- from azure.core.credentials import AzureSasCredential
11
+ from azure.core.credentials import AzureSasCredential, TokenCredential
12
12
 
13
13
 
14
14
  class BlobStoreInfo(NamedTuple):
@@ -16,7 +16,7 @@ class BlobStoreInfo(NamedTuple):
16
16
  account_name: str
17
17
  endpoint: str
18
18
  container_name: str
19
- credential: Optional[Union[AzureSasCredential, str]]
19
+ credential: Optional[Union[AzureSasCredential, TokenCredential, str]]
20
20
 
21
21
 
22
22
  class WorkspaceHubConfig(Model):
@@ -366,7 +366,7 @@ def validate_conversation(conversation):
366
366
  if not isinstance(messages, list):
367
367
  raise_exception(
368
368
  "'messages' parameter must be a JSON-compatible list of chat messages",
369
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
369
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
370
370
  )
371
371
  expected_roles = {"user", "assistant", "system"}
372
372
  image_found = False
@@ -393,7 +393,7 @@ def validate_conversation(conversation):
393
393
  ):
394
394
  raise_exception(
395
395
  f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
396
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
396
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
397
397
  )
398
398
  if isinstance(message, AssistantMessage):
399
399
  assistant_message_count += 1
@@ -407,7 +407,7 @@ def validate_conversation(conversation):
407
407
  if message.get("role") not in expected_roles:
408
408
  raise_exception(
409
409
  f"Invalid role provided: {message.get('role')}. Message number: {num}",
410
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
410
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
411
411
  )
412
412
  if message.get("role") == "assistant":
413
413
  assistant_message_count += 1
@@ -417,7 +417,7 @@ def validate_conversation(conversation):
417
417
  if not isinstance(content, (str, list)):
418
418
  raise_exception(
419
419
  f"Content in each turn must be a string or array. Message number: {num}",
420
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
420
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
421
421
  )
422
422
  if isinstance(content, list):
423
423
  if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
@@ -425,21 +425,21 @@ def validate_conversation(conversation):
425
425
  if not image_found:
426
426
  raise_exception(
427
427
  "Message needs to have multi-modal input like images.",
428
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
428
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
429
429
  )
430
430
  if assistant_message_count == 0:
431
431
  raise_exception(
432
432
  "Assistant role required in one of the messages.",
433
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
433
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
434
434
  )
435
435
  if user_message_count == 0:
436
436
  raise_exception(
437
437
  "User role required in one of the messages.",
438
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
438
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
439
439
  )
440
440
  if assistant_message_count > 1:
441
441
  raise_exception(
442
442
  "Evaluators for multimodal conversations only support single turn. "
443
443
  "User and assistant role expected as the only role in each message.",
444
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
444
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
445
445
  )
@@ -1,7 +1,9 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
+ import enum
4
5
  from typing import Literal
6
+ from azure.ai.evaluation._common._experimental import experimental
5
7
 
6
8
 
7
9
  class EvaluationMetrics:
@@ -20,6 +22,9 @@ class EvaluationMetrics:
20
22
  SELF_HARM = "self_harm"
21
23
  SEXUAL = "sexual"
22
24
  PROTECTED_MATERIAL = "protected_material"
25
+ ARTWORK = "artwork"
26
+ FICTIONAL_CHARACTERS = "fictional_characters"
27
+ LOGOS_AND_BRANDS = "logos_and_brands"
23
28
  XPIA = "xpia"
24
29
 
25
30
 
@@ -57,6 +62,22 @@ class EvaluationRunProperties:
57
62
  EVALUATION_SDK = "_azureml.evaluation_sdk_name"
58
63
 
59
64
 
65
+ @experimental
66
+ class _AggregationType(enum.Enum):
67
+ """Defines how numeric evaluation results should be aggregated
68
+ to produce a single value. Used by individual evaluators to combine per-turn results for
69
+ a conversation-based input. In general, wherever this enum is used, it is also possible
70
+ to directly assign the underlying aggregation function for more complex use cases.
71
+ The 'custom' value is generally not an acceptable input, and should only be used as an output
72
+ to indicate that a custom aggregation function has been injected."""
73
+
74
+ MEAN = "mean"
75
+ MAX = "max"
76
+ MIN = "min"
77
+ SUM = "sum"
78
+ CUSTOM = "custom"
79
+
80
+
60
81
  DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
61
82
 
62
83
  CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
@@ -5,5 +5,6 @@ from .eval_run_context import EvalRunContext
5
5
  from .code_client import CodeClient
6
6
  from .proxy_client import ProxyClient
7
7
  from .target_run_context import TargetRunContext
8
+ from .proxy_client import ProxyRun
8
9
 
9
- __all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
10
+ __all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext", "ProxyRun"]
@@ -421,7 +421,9 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
421
421
  local_paths.append(local_file_path)
422
422
 
423
423
  # We will write the artifacts to the workspaceblobstore
424
- datastore = self._management_client.workspace_get_default_datastore(self._workspace_name, True)
424
+ datastore = self._management_client.workspace_get_default_datastore(
425
+ self._workspace_name, include_credentials=True
426
+ )
425
427
  account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
426
428
 
427
429
  svc_client = BlobServiceClient(account_url=account_url, credential=datastore.credential)