azure-ai-evaluation 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (178) hide show
  1. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/CHANGELOG.md +6 -0
  2. {azure_ai_evaluation-1.2.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.3.0}/PKG-INFO +7 -1
  3. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/__init__.py +1 -15
  4. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/utils.py +8 -8
  5. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_constants.py +3 -0
  6. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_evaluate.py +5 -2
  7. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_exceptions.py +0 -1
  8. azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +640 -0
  9. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_version.py +2 -1
  10. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +10 -3
  11. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
  12. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
  13. azure_ai_evaluation-1.3.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  14. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
  15. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0/azure_ai_evaluation.egg-info}/PKG-INFO +7 -1
  16. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/SOURCES.txt +5 -8
  17. azure_ai_evaluation-1.3.0/migration_guide.md +243 -0
  18. azure_ai_evaluation-1.3.0/samples/evaluation_samples_safety_evaluation.py +251 -0
  19. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/setup.py +1 -1
  20. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/conftest.py +2 -1
  21. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_adv_simulator.py +1 -2
  22. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_builtin_evaluators.py +0 -16
  23. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_mass_evaluate.py +9 -36
  24. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_sim_and_eval.py +8 -5
  25. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate.py +17 -4
  26. azure_ai_evaluation-1.3.0/tests/unittests/test_safety_evaluation.py +215 -0
  27. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_callback_conv_bot.py +5 -4
  28. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  29. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  30. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  31. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  32. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  33. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  34. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  35. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  36. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/MANIFEST.in +0 -0
  37. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/NOTICE.txt +0 -0
  38. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/README.md +0 -0
  39. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/TROUBLESHOOTING.md +0 -0
  40. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/__init__.py +0 -0
  41. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/__init__.py +0 -0
  42. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  43. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
  44. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_models.py +0 -0
  45. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  46. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/__init__.py +0 -0
  47. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  48. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/constants.py +0 -0
  49. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/math.py +0 -0
  50. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/rai_service.py +0 -0
  51. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  52. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  53. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  54. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  55. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  56. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
  57. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
  58. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  59. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_utils.py +0 -0
  60. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  61. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  62. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
  63. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  64. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
  65. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  66. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  67. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -0
  68. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
  69. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
  70. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
  71. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  72. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  73. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
  74. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
  75. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
  76. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
  77. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
  78. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  79. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  80. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  81. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
  82. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  83. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
  84. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  85. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  86. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  87. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  88. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -0
  89. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  90. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  91. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  92. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  93. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  94. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  95. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  96. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
  97. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  98. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
  99. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  100. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  101. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
  102. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  103. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  104. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
  105. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  106. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  107. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  108. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
  109. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  110. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  111. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  112. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_http_utils.py +0 -0
  113. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  114. {azure_ai_evaluation-1.2.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
  115. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_user_agent.py +0 -0
  116. {azure_ai_evaluation-1.2.0/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
  117. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  118. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  119. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  120. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  121. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  122. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/py.typed +0 -0
  123. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  124. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  125. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  126. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  127. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  128. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  129. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  130. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  131. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  132. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
  133. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  134. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  135. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  136. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
  137. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  138. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  139. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  140. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  141. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_simulator.py +0 -0
  142. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_tracing.py +0 -0
  143. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  144. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  145. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  146. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/requires.txt +0 -0
  147. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  148. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/pyproject.toml +0 -0
  149. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/README.md +0 -0
  150. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/data/evaluate_test_data.jsonl +0 -0
  151. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_common.py +0 -0
  152. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_evaluate.py +0 -0
  153. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_simulate.py +0 -0
  154. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/setup.cfg +0 -0
  155. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/__init__.py +0 -0
  156. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/__openai_patcher.py +0 -0
  157. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/__init__.py +0 -0
  158. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  159. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/target_fn.py +0 -0
  160. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_evaluate.py +0 -0
  161. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_lite_management_client.py +0 -0
  162. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_metrics_upload.py +0 -0
  163. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_batch_run_context.py +0 -0
  164. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_built_in_evaluator.py +0 -0
  165. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  166. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
  167. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_eval_run.py +0 -0
  168. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_performance.py +0 -0
  169. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_telemetry.py +0 -0
  170. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
  171. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  172. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  173. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  174. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  175. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_save_eval.py +0 -0
  176. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_simulator.py +0 -0
  177. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  178. {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,11 @@
1
1
  # Release History
2
2
 
3
+ ## 1.3.0 (2025-02-28)
4
+
5
+ ### Breaking Changes
6
+ - Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
7
+ - Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
8
+
3
9
  ## 1.2.0 (2025-01-27)
4
10
 
5
11
  ### Features Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -377,6 +377,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
377
377
 
378
378
  # Release History
379
379
 
380
+ ## 1.3.0 (2025-02-28)
381
+
382
+ ### Breaking Changes
383
+ - Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
384
+ - Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
385
+
380
386
  ## 1.2.0 (2025-01-27)
381
387
 
382
388
  ### Features Added
@@ -12,14 +12,6 @@ from ._evaluators._content_safety import (
12
12
  SexualEvaluator,
13
13
  ViolenceEvaluator,
14
14
  )
15
- from ._evaluators._multimodal._content_safety_multimodal import (
16
- ContentSafetyMultimodalEvaluator,
17
- HateUnfairnessMultimodalEvaluator,
18
- SelfHarmMultimodalEvaluator,
19
- SexualMultimodalEvaluator,
20
- ViolenceMultimodalEvaluator,
21
- )
22
- from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
23
15
  from ._evaluators._f1_score import F1ScoreEvaluator
24
16
  from ._evaluators._fluency import FluencyEvaluator
25
17
  from ._evaluators._gleu import GleuScoreEvaluator
@@ -72,11 +64,5 @@ __all__ = [
72
64
  "EvaluatorConfig",
73
65
  "Conversation",
74
66
  "Message",
75
- "EvaluationResult",
76
- "ContentSafetyMultimodalEvaluator",
77
- "HateUnfairnessMultimodalEvaluator",
78
- "SelfHarmMultimodalEvaluator",
79
- "SexualMultimodalEvaluator",
80
- "ViolenceMultimodalEvaluator",
81
- "ProtectedMaterialMultimodalEvaluator",
67
+ "EvaluationResult"
82
68
  ]
@@ -366,7 +366,7 @@ def validate_conversation(conversation):
366
366
  if not isinstance(messages, list):
367
367
  raise_exception(
368
368
  "'messages' parameter must be a JSON-compatible list of chat messages",
369
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
369
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
370
370
  )
371
371
  expected_roles = {"user", "assistant", "system"}
372
372
  image_found = False
@@ -393,7 +393,7 @@ def validate_conversation(conversation):
393
393
  ):
394
394
  raise_exception(
395
395
  f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
396
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
396
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
397
397
  )
398
398
  if isinstance(message, AssistantMessage):
399
399
  assistant_message_count += 1
@@ -407,7 +407,7 @@ def validate_conversation(conversation):
407
407
  if message.get("role") not in expected_roles:
408
408
  raise_exception(
409
409
  f"Invalid role provided: {message.get('role')}. Message number: {num}",
410
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
410
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
411
411
  )
412
412
  if message.get("role") == "assistant":
413
413
  assistant_message_count += 1
@@ -417,7 +417,7 @@ def validate_conversation(conversation):
417
417
  if not isinstance(content, (str, list)):
418
418
  raise_exception(
419
419
  f"Content in each turn must be a string or array. Message number: {num}",
420
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
420
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
421
421
  )
422
422
  if isinstance(content, list):
423
423
  if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
@@ -425,21 +425,21 @@ def validate_conversation(conversation):
425
425
  if not image_found:
426
426
  raise_exception(
427
427
  "Message needs to have multi-modal input like images.",
428
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
428
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
429
429
  )
430
430
  if assistant_message_count == 0:
431
431
  raise_exception(
432
432
  "Assistant role required in one of the messages.",
433
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
433
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
434
434
  )
435
435
  if user_message_count == 0:
436
436
  raise_exception(
437
437
  "User role required in one of the messages.",
438
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
438
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
439
439
  )
440
440
  if assistant_message_count > 1:
441
441
  raise_exception(
442
442
  "Evaluators for multimodal conversations only support single turn. "
443
443
  "User and assistant role expected as the only role in each message.",
444
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
444
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
445
445
  )
@@ -22,6 +22,9 @@ class EvaluationMetrics:
22
22
  SELF_HARM = "self_harm"
23
23
  SEXUAL = "sexual"
24
24
  PROTECTED_MATERIAL = "protected_material"
25
+ ARTWORK = "artwork"
26
+ FICTIONAL_CHARACTERS = "fictional_characters"
27
+ LOGOS_AND_BRANDS = "logos_and_brands"
25
28
  XPIA = "xpia"
26
29
 
27
30
 
@@ -147,6 +147,9 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s
147
147
  """
148
148
  handled_metrics = [
149
149
  EvaluationMetrics.PROTECTED_MATERIAL,
150
+ EvaluationMetrics.FICTIONAL_CHARACTERS,
151
+ EvaluationMetrics.ARTWORK,
152
+ EvaluationMetrics.LOGOS_AND_BRANDS,
150
153
  _InternalEvaluationMetrics.ECI,
151
154
  EvaluationMetrics.XPIA,
152
155
  ]
@@ -525,7 +528,7 @@ def _process_column_mappings(
525
528
 
526
529
  processed_config: Dict[str, Dict[str, str]] = {}
527
530
 
528
- unexpected_references = re.compile(r"\${(?!target\.|data\.).+?}")
531
+ expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-Z_]+\}$")
529
532
 
530
533
  if column_mapping:
531
534
  for evaluator, mapping_config in column_mapping.items():
@@ -534,7 +537,7 @@ def _process_column_mappings(
534
537
 
535
538
  for map_to_key, map_value in mapping_config.items():
536
539
  # Check if there's any unexpected reference other than ${target.} or ${data.}
537
- if unexpected_references.search(map_value):
540
+ if not expected_references.search(map_value):
538
541
  msg = "Unexpected references detected in 'column_mapping'. Ensure only ${target.} and ${data.} are used."
539
542
  raise EvaluationException(
540
543
  message=msg,
@@ -63,7 +63,6 @@ class ErrorTarget(Enum):
63
63
  RAI_CLIENT = "RAIClient"
64
64
  COHERENCE_EVALUATOR = "CoherenceEvaluator"
65
65
  CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
66
- CONTENT_SAFETY_MULTIMODAL_EVALUATOR = "ContentSafetyMultimodalEvaluator"
67
66
  ECI_EVALUATOR = "ECIEvaluator"
68
67
  F1_EVALUATOR = "F1Evaluator"
69
68
  GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"