azure-ai-evaluation 1.0.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (177) hide show
  1. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/CHANGELOG.md +73 -1
  2. {azure_ai_evaluation-1.0.1/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.2.0}/PKG-INFO +84 -15
  3. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/README.md +7 -7
  4. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/TROUBLESHOOTING.md +5 -1
  5. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_azure/_clients.py +204 -0
  6. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_azure/_models.py +227 -0
  7. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_azure/_token_manager.py +118 -0
  8. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_common/rai_service.py +30 -21
  9. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_constants.py +19 -0
  10. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +2 -1
  11. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
  12. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_eval_run.py +16 -43
  13. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_evaluate.py +76 -44
  14. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_utils.py +93 -34
  15. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +46 -25
  16. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  17. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +140 -5
  18. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +61 -0
  19. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -1
  20. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +40 -2
  21. azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  22. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +6 -43
  23. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +2 -0
  24. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +2 -0
  25. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +2 -0
  26. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +2 -0
  27. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +61 -68
  28. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +45 -23
  29. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +55 -34
  30. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +32 -27
  31. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +44 -23
  32. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +42 -82
  33. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_http_utils.py +6 -4
  34. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  35. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  36. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  37. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_version.py +1 -1
  38. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
  39. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +35 -16
  40. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +128 -7
  41. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -1
  42. azure_ai_evaluation-1.2.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  43. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +1 -0
  44. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +40 -0
  45. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -0
  46. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_simulator.py +24 -13
  47. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0/azure_ai_evaluation.egg-info}/PKG-INFO +84 -15
  48. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure_ai_evaluation.egg-info/SOURCES.txt +10 -1
  49. azure_ai_evaluation-1.2.0/azure_ai_evaluation.egg-info/requires.txt +7 -0
  50. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/samples/README.md +1 -1
  51. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/samples/evaluation_samples_simulate.py +1 -1
  52. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/setup.py +4 -9
  53. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/conftest.py +99 -32
  54. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/e2etests/test_adv_simulator.py +222 -4
  55. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/e2etests/test_builtin_evaluators.py +103 -45
  56. azure_ai_evaluation-1.2.0/tests/e2etests/test_evaluate.py +501 -0
  57. azure_ai_evaluation-1.2.0/tests/e2etests/test_lite_management_client.py +81 -0
  58. azure_ai_evaluation-1.2.0/tests/e2etests/test_mass_evaluate.py +406 -0
  59. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/e2etests/test_metrics_upload.py +40 -19
  60. azure_ai_evaluation-1.2.0/tests/e2etests/test_sim_and_eval.py +398 -0
  61. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_batch_run_context.py +2 -2
  62. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_built_in_evaluator.py +1 -1
  63. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_content_safety_rai_script.py +2 -0
  64. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_eval_run.py +23 -23
  65. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_evaluate.py +189 -31
  66. azure_ai_evaluation-1.2.0/tests/unittests/test_evaluate_performance.py +78 -0
  67. azure_ai_evaluation-1.2.0/tests/unittests/test_evaluators/slow_eval.py +34 -0
  68. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +42 -0
  69. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_save_eval.py +6 -4
  70. azure_ai_evaluation-1.0.1/azure_ai_evaluation.egg-info/requires.txt +0 -9
  71. azure_ai_evaluation-1.0.1/tests/__pf_service_isolation.py +0 -28
  72. azure_ai_evaluation-1.0.1/tests/e2etests/test_evaluate.py +0 -926
  73. azure_ai_evaluation-1.0.1/tests/e2etests/test_sim_and_eval.py +0 -129
  74. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/MANIFEST.in +0 -0
  75. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/NOTICE.txt +0 -0
  76. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/__init__.py +0 -0
  77. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/__init__.py +0 -0
  78. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/__init__.py +0 -0
  79. {azure_ai_evaluation-1.0.1/azure/ai/evaluation/_evaluate → azure_ai_evaluation-1.2.0/azure/ai/evaluation/_azure}/__init__.py +0 -0
  80. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_common/__init__.py +0 -0
  81. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  82. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_common/constants.py +0 -0
  83. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_common/math.py +0 -0
  84. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_common/utils.py +0 -0
  85. {azure_ai_evaluation-1.0.1/azure/ai/evaluation/_evaluators → azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluate}/__init__.py +0 -0
  86. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
  87. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
  88. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
  89. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
  90. {azure_ai_evaluation-1.0.1/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators}/__init__.py +0 -0
  91. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  92. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  93. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
  94. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  95. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  96. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  97. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  98. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  99. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  100. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
  101. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  102. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  103. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  104. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -0
  105. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  106. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  107. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  108. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -0
  109. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -0
  110. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -0
  111. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -0
  112. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -0
  113. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -0
  114. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -0
  115. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -0
  116. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  117. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  118. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  119. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  120. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
  121. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  122. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  123. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
  124. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  125. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  126. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  127. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  128. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  129. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  130. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  131. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  132. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_exceptions.py +0 -0
  133. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  134. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_user_agent.py +0 -0
  135. {azure_ai_evaluation-1.0.1/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.2.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
  136. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  137. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  138. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/py.typed +0 -0
  139. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  140. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  141. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  142. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  143. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  144. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  145. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  146. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  147. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  148. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  149. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +0 -0
  150. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  151. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  152. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  153. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  154. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_tracing.py +0 -0
  155. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  156. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  157. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  158. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  159. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/pyproject.toml +0 -0
  160. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/samples/data/evaluate_test_data.jsonl +0 -0
  161. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/samples/evaluation_samples_common.py +0 -0
  162. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/samples/evaluation_samples_evaluate.py +0 -0
  163. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/setup.cfg +0 -0
  164. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/__init__.py +0 -0
  165. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/__openai_patcher.py +0 -0
  166. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/e2etests/__init__.py +0 -0
  167. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  168. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/e2etests/target_fn.py +0 -0
  169. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  170. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_evaluate_telemetry.py +0 -0
  171. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
  172. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  173. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  174. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_simulator.py +0 -0
  175. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  176. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  177. {azure_ai_evaluation-1.0.1 → azure_ai_evaluation-1.2.0}/tests/unittests/test_utils.py +0 -0
@@ -1,10 +1,81 @@
1
1
  # Release History
2
2
 
3
+ ## 1.2.0 (2025-01-27)
4
+
5
+ ### Features Added
6
+ - CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
7
+
8
+ ### Breaking Changes
9
+ - `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
10
+
11
+ ### Bugs Fixed
12
+ - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
13
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
14
+ - Fixed the non adversarial simulator to run in task-free mode
15
+ - Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
16
+ main score when aggregating per-turn evaluations from a conversation into an overall
17
+ evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
18
+ - Fixed bug in non adversarial simulator sample where `tasks` undefined
19
+
20
+ ### Other Changes
21
+ - Changed minimum required python version to use this package from 3.8 to 3.9
22
+ - Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
23
+ - Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
24
+ environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
25
+
26
+ ## 1.1.0 (2024-12-12)
27
+
28
+ ### Features Added
29
+ - Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
30
+
31
+ ```python
32
+ evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
33
+ conversation = {
34
+ "messages": [
35
+ {
36
+ "role": "system",
37
+ "content": [
38
+ {"type": "text", "text": "You are an AI assistant that understands images."}
39
+ ],
40
+ },
41
+ {
42
+ "role": "user",
43
+ "content": [
44
+ {"type": "text", "text": "Can you describe this image?"},
45
+ {
46
+ "type": "image_url",
47
+ "image_url": {
48
+ "url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
49
+ },
50
+ },
51
+ ],
52
+ },
53
+ {
54
+ "role": "assistant",
55
+ "content": [
56
+ {
57
+ "type": "text",
58
+ "text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
59
+ }
60
+ ],
61
+ },
62
+ ]
63
+ }
64
+ print("Calling Content Safety Evaluator for multi-modal")
65
+ score = evaluator(conversation=conversation)
66
+ ```
67
+
68
+ - Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
69
+
70
+ ### Bugs Fixed
71
+ - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
72
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
73
+
3
74
  ## 1.0.1 (2024-11-15)
4
75
 
5
76
  ### Bugs Fixed
6
- - Fixed `[remote]` extra to be needed only when tracking results in Azure AI Studio.
7
77
  - Removing `azure-ai-inference` as dependency.
78
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
8
79
 
9
80
  ## 1.0.0 (2024-11-13)
10
81
 
@@ -16,6 +87,7 @@
16
87
  - Fixed an issue where the `output_path` parameter in the `evaluate` API did not support relative path.
17
88
  - Output of adversarial simulators are of type `JsonLineList` and the helper function `to_eval_qr_json_lines` now outputs context from both user and assistant turns along with `category` if it exists in the conversation
18
89
  - Fixed an issue where during long-running simulations, API token expires causing "Forbidden" error. Instead, users can now set an environment variable `AZURE_TOKEN_REFRESH_INTERVAL` to refresh the token more frequently to prevent expiration and ensure continuous operation of the simulation.
90
+ - Fixed an issue with the `ContentSafetyEvaluator` that caused parallel execution of sub-evaluators to fail. Parallel execution is now enabled by default again, but can still be disabled via the '_parallel' boolean keyword argument during class initialization.
19
91
  - Fix `evaluate` function not producing aggregated metrics if ANY values to be aggregated were None, NaN, or
20
92
  otherwise difficult to process. Such values are ignored fully, so the aggregated metric of `[1, 2, 3, NaN]`
21
93
  would be 2, not 1.5.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.0.1
3
+ Version: 1.2.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -13,23 +13,21 @@ Classifier: Development Status :: 5 - Production/Stable
13
13
  Classifier: Programming Language :: Python
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Programming Language :: Python :: 3 :: Only
16
- Classifier: Programming Language :: Python :: 3.8
17
16
  Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
20
19
  Classifier: License :: OSI Approved :: MIT License
21
20
  Classifier: Operating System :: OS Independent
22
- Requires-Python: >=3.8
21
+ Requires-Python: >=3.9
23
22
  Description-Content-Type: text/markdown
24
23
  License-File: NOTICE.txt
25
- Requires-Dist: promptflow-devkit>=1.15.0
26
- Requires-Dist: promptflow-core>=1.15.0
24
+ Requires-Dist: promptflow-devkit>=1.17.1
25
+ Requires-Dist: promptflow-core>=1.17.1
27
26
  Requires-Dist: pyjwt>=2.8.0
28
27
  Requires-Dist: azure-identity>=1.16.0
29
28
  Requires-Dist: azure-core>=1.30.2
30
29
  Requires-Dist: nltk>=3.9.1
31
- Provides-Extra: remote
32
- Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "remote"
30
+ Requires-Dist: azure-storage-blob>=12.10.0
33
31
 
34
32
  # Azure AI Evaluation client library for Python
35
33
 
@@ -55,7 +53,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
55
53
 
56
54
  ### Prerequisites
57
55
 
58
- - Python 3.8 or later is required to use this package.
56
+ - Python 3.9 or later is required to use this package.
59
57
  - [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
60
58
 
61
59
  ### Install the package
@@ -359,13 +357,13 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
359
357
  [evaluate_dataset]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#evaluate-on-test-dataset-using-evaluate
360
358
  [evaluators]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
361
359
  [evaluate_api]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview#azure-ai-evaluation-evaluate
362
- [evaluate_app]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/evaluate_app
360
+ [evaluate_app]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_App_Endpoint
363
361
  [evaluation_tsg]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md
364
362
  [ai_studio]: https://learn.microsoft.com/azure/ai-studio/what-is-ai-studio
365
363
  [ai_project]: https://learn.microsoft.com/azure/ai-studio/how-to/create-projects?tabs=ai-studio
366
364
  [azure_openai]: https://learn.microsoft.com/azure/ai-services/openai/
367
- [evaluate_models]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/evaluate_endpoints
368
- [custom_evaluators]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/evaluate_custom
365
+ [evaluate_models]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint
366
+ [custom_evaluators]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators
369
367
  [evaluate_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate
370
368
  [evaluation_metrics]: https://learn.microsoft.com/azure/ai-studio/concepts/evaluation-metrics-built-in
371
369
  [performance_and_quality_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#performance-and-quality-evaluators
@@ -373,18 +371,88 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
373
371
  [composite_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#composite-evaluators
374
372
  [adversarial_simulation_docs]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#generate-adversarial-simulations-for-safety-evaluation
375
373
  [adversarial_simulation_scenarios]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#supported-adversarial-simulation-scenarios
376
- [adversarial_simulation]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_adversarial
377
- [simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_conversation_starter
374
+ [adversarial_simulation]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Simulators/Simulate_Adversarial_Data
375
+ [simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter
378
376
  [adversarial_jailbreak]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#simulating-jailbreak-attacks
379
377
 
380
-
381
378
  # Release History
382
379
 
380
+ ## 1.2.0 (2025-01-27)
381
+
382
+ ### Features Added
383
+ - CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
384
+
385
+ ### Breaking Changes
386
+ - `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
387
+
388
+ ### Bugs Fixed
389
+ - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
390
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
391
+ - Fixed the non adversarial simulator to run in task-free mode
392
+ - Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
393
+ main score when aggregating per-turn evaluations from a conversation into an overall
394
+ evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
395
+ - Fixed bug in non adversarial simulator sample where `tasks` undefined
396
+
397
+ ### Other Changes
398
+ - Changed minimum required python version to use this package from 3.8 to 3.9
399
+ - Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
400
+ - Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
401
+ environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
402
+
403
+ ## 1.1.0 (2024-12-12)
404
+
405
+ ### Features Added
406
+ - Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
407
+
408
+ ```python
409
+ evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
410
+ conversation = {
411
+ "messages": [
412
+ {
413
+ "role": "system",
414
+ "content": [
415
+ {"type": "text", "text": "You are an AI assistant that understands images."}
416
+ ],
417
+ },
418
+ {
419
+ "role": "user",
420
+ "content": [
421
+ {"type": "text", "text": "Can you describe this image?"},
422
+ {
423
+ "type": "image_url",
424
+ "image_url": {
425
+ "url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
426
+ },
427
+ },
428
+ ],
429
+ },
430
+ {
431
+ "role": "assistant",
432
+ "content": [
433
+ {
434
+ "type": "text",
435
+ "text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
436
+ }
437
+ ],
438
+ },
439
+ ]
440
+ }
441
+ print("Calling Content Safety Evaluator for multi-modal")
442
+ score = evaluator(conversation=conversation)
443
+ ```
444
+
445
+ - Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
446
+
447
+ ### Bugs Fixed
448
+ - Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
449
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
450
+
383
451
  ## 1.0.1 (2024-11-15)
384
452
 
385
453
  ### Bugs Fixed
386
- - Fixed `[remote]` extra to be needed only when tracking results in Azure AI Studio.
387
454
  - Removing `azure-ai-inference` as dependency.
455
+ - Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
388
456
 
389
457
  ## 1.0.0 (2024-11-13)
390
458
 
@@ -396,6 +464,7 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
396
464
  - Fixed an issue where the `output_path` parameter in the `evaluate` API did not support relative path.
397
465
  - Output of adversarial simulators are of type `JsonLineList` and the helper function `to_eval_qr_json_lines` now outputs context from both user and assistant turns along with `category` if it exists in the conversation
398
466
  - Fixed an issue where during long-running simulations, API token expires causing "Forbidden" error. Instead, users can now set an environment variable `AZURE_TOKEN_REFRESH_INTERVAL` to refresh the token more frequently to prevent expiration and ensure continuous operation of the simulation.
467
+ - Fixed an issue with the `ContentSafetyEvaluator` that caused parallel execution of sub-evaluators to fail. Parallel execution is now enabled by default again, but can still be disabled via the '_parallel' boolean keyword argument during class initialization.
399
468
  - Fix `evaluate` function not producing aggregated metrics if ANY values to be aggregated were None, NaN, or
400
469
  otherwise difficult to process. Such values are ignored fully, so the aggregated metric of `[1, 2, 3, NaN]`
401
470
  would be 2, not 1.5.
@@ -22,7 +22,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
22
22
 
23
23
  ### Prerequisites
24
24
 
25
- - Python 3.8 or later is required to use this package.
25
+ - Python 3.9 or later is required to use this package.
26
26
  - [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
27
27
 
28
28
  ### Install the package
@@ -326,13 +326,13 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
326
326
  [evaluate_dataset]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#evaluate-on-test-dataset-using-evaluate
327
327
  [evaluators]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
328
328
  [evaluate_api]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview#azure-ai-evaluation-evaluate
329
- [evaluate_app]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/evaluate_app
329
+ [evaluate_app]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_App_Endpoint
330
330
  [evaluation_tsg]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md
331
331
  [ai_studio]: https://learn.microsoft.com/azure/ai-studio/what-is-ai-studio
332
332
  [ai_project]: https://learn.microsoft.com/azure/ai-studio/how-to/create-projects?tabs=ai-studio
333
333
  [azure_openai]: https://learn.microsoft.com/azure/ai-services/openai/
334
- [evaluate_models]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/evaluate_endpoints
335
- [custom_evaluators]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/evaluate_custom
334
+ [evaluate_models]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint
335
+ [custom_evaluators]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators
336
336
  [evaluate_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate
337
337
  [evaluation_metrics]: https://learn.microsoft.com/azure/ai-studio/concepts/evaluation-metrics-built-in
338
338
  [performance_and_quality_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#performance-and-quality-evaluators
@@ -340,6 +340,6 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
340
340
  [composite_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#composite-evaluators
341
341
  [adversarial_simulation_docs]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#generate-adversarial-simulations-for-safety-evaluation
342
342
  [adversarial_simulation_scenarios]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#supported-adversarial-simulation-scenarios
343
- [adversarial_simulation]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_adversarial
344
- [simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_conversation_starter
345
- [adversarial_jailbreak]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#simulating-jailbreak-attacks
343
+ [adversarial_simulation]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Simulators/Simulate_Adversarial_Data
344
+ [simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter
345
+ [adversarial_jailbreak]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#simulating-jailbreak-attacks
@@ -26,6 +26,10 @@ This guide walks you through how to investigate failures, common errors in the `
26
26
  - Ensure that you assign the proper permissions to the storage account linked to your Azure AI Studio hub. This can be done with the following command. More information can be found [here](https://aka.ms/credentialleshub).
27
27
 
28
28
  ```Shell
29
+ # <mySubscriptionID>: Subscription ID of the Azure AI Studio hub's linked storage account (available in Azure AI hub resource view in Azure Portal).
30
+ # <myResourceGroupName>: Resource group of the Azure AI Studio hub's linked storage account.
31
+ # <user-id>: User object ID for role assignment (retrieve with "az ad user show" command).
32
+
29
33
  az role assignment create --role "Storage Blob Data Contributor" --scope /subscriptions/<mySubscriptionID>/resourceGroups/<myResourceGroupName> --assignee-principal-type User --assignee-object-id "<user-id>"
30
34
  ```
31
35
 
@@ -50,7 +54,7 @@ The Adversarial simulator does not support selecting individual harms, instead w
50
54
  ### Simulator is slow
51
55
 
52
56
  Identify the type of simulations being run (adversarial or non-adversarial).
53
- Adjust parameters such as `api_call_retry_sleep_sec`, `api_call_delay_sec`, and `concurrent_async_task`. Please note that rate limits to llm calls can be both tokens per minute and requests per minute.
57
+ Adjust parameters such as `api_call_retry_sleep_sec`, `api_call_delay_sec`, and `concurrent_async_task`. Please note that rate limits to llm calls can be both tokens per minute and requests per minute.
54
58
 
55
59
  ## Logging
56
60
 
@@ -0,0 +1,204 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from logging import Logger
6
+ from typing import Any, Dict, Final, Optional, Set, Union, cast
7
+ from threading import Lock
8
+ from urllib.parse import quote
9
+ from json.decoder import JSONDecodeError
10
+
11
+ from azure.core.credentials import TokenCredential, AzureSasCredential
12
+ from azure.core.rest import HttpResponse
13
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
14
+ from azure.ai.evaluation._http_utils import HttpPipeline, get_http_client
15
+ from azure.ai.evaluation._azure._token_manager import AzureMLTokenManager
16
+ from azure.ai.evaluation.simulator._model_tools._identity_manager import TokenScope
17
+ from ._models import BlobStoreInfo, Workspace
18
+
19
+
20
+ API_VERSION: Final[str] = "2024-07-01-preview"
21
+ QUERY_KEY_API_VERSION: Final[str] = "api-version"
22
+ PATH_ML_WORKSPACES = ("providers", "Microsoft.MachineLearningServices", "workspaces")
23
+
24
+
25
+ class LiteMLClient:
26
+ """A lightweight Azure ML API client.
27
+
28
+ :param subscription_id: Azure subscription ID
29
+ :type subscription_id: str
30
+ :param resource_group: Azure resource group name
31
+ :type resource_group: str
32
+ :param logger: Logger object
33
+ :type logger: logging.Logger
34
+ :keyword credential: Azure credentials
35
+ :paramtype credential: TokenCredential
36
+ :keyword kwargs: Additional keyword arguments
37
+ :paramtype kwargs: Dict
38
+ :keyword str api_version: The API version. Default is 2024-10-01
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ subscription_id: str,
44
+ resource_group: str,
45
+ logger: Logger,
46
+ credential: Optional[TokenCredential] = None,
47
+ **kwargs: Any,
48
+ ) -> None:
49
+ subscription_id = quote(subscription_id, safe="")
50
+ resource_group = quote(resource_group, safe="")
51
+
52
+ self._base_url: Final[str] = (
53
+ f"https://management.azure.com/subscriptions/{subscription_id}/resourceGroups/{resource_group}"
54
+ )
55
+ self._logger: Final[Logger] = logger
56
+ self._api_version: Final[str] = kwargs.get("api_version", API_VERSION)
57
+ self._http_client: Final[HttpPipeline] = get_http_client(**kwargs)
58
+ self._lock: Final[Lock] = Lock()
59
+
60
+ # things that can change under lock
61
+ self._token_manager: Optional[AzureMLTokenManager] = None
62
+ self._credential: Optional[TokenCredential] = credential
63
+
64
+ def get_token(self) -> str:
65
+ return self._get_token_manager().get_token()
66
+
67
+ def get_credential(self) -> TokenCredential:
68
+ # load the token manager to get the credential if needed
69
+ self._get_token_manager()
70
+ return cast(TokenCredential, self._credential)
71
+
72
+ def workspace_get_default_datastore(
73
+ self, workspace_name: str, *, include_credentials: bool = False, **kwargs: Any
74
+ ) -> BlobStoreInfo:
75
+ # 1. Get the default blob store
76
+ # REST API documentation:
77
+ # https://learn.microsoft.com/rest/api/azureml/datastores/list?view=rest-azureml-2024-10-01
78
+ url = self._generate_path( # pylint: disable=specify-parameter-names-in-call
79
+ *PATH_ML_WORKSPACES, workspace_name, "datastores"
80
+ )
81
+ headers = self._get_headers()
82
+
83
+ stores_response = self._http_client.request(
84
+ method="GET",
85
+ url=url,
86
+ params={QUERY_KEY_API_VERSION: self._api_version, "isDefault": True, "count": 1, "orderByAsc": "false"},
87
+ headers=headers,
88
+ )
89
+ self._throw_on_http_error(stores_response, "list default workspace datastore")
90
+
91
+ json = stores_response.json()["value"][0]
92
+ props_json = json["properties"]
93
+ name = json["name"]
94
+ account_name = props_json["accountName"]
95
+ endpoint = props_json["endpoint"]
96
+ container_name = props_json["containerName"]
97
+ credential_type = props_json.get("credentials", {}).get("credentialsType")
98
+
99
+ # 2. Get the SAS token to use for accessing the blob store
100
+ # REST API documentation:
101
+ # https://learn.microsoft.com/rest/api/azureml/datastores/list-secrets?view=rest-azureml-2024-10-01
102
+ blob_store_credential: Optional[Union[AzureSasCredential, TokenCredential, str]]
103
+ if not include_credentials:
104
+ blob_store_credential = None
105
+ elif credential_type and credential_type.lower() == "none":
106
+ # If storage account key access is disabled, and only Microsoft Entra ID authentication is available,
107
+ # the credentialsType will be "None" and we should not attempt to get the secrets.
108
+ blob_store_credential = self.get_credential()
109
+ else:
110
+ url = self._generate_path(
111
+ *PATH_ML_WORKSPACES, workspace_name, "datastores", "workspaceblobstore", "listSecrets"
112
+ )
113
+ secrets_response = self._http_client.request(
114
+ method="POST",
115
+ url=url,
116
+ json={
117
+ "expirableSecret": True,
118
+ "expireAfterHours": int(kwargs.get("key_expiration_hours", 1)),
119
+ },
120
+ params={
121
+ QUERY_KEY_API_VERSION: self._api_version,
122
+ },
123
+ headers=headers,
124
+ )
125
+ self._throw_on_http_error(secrets_response, "workspace datastore secrets")
126
+
127
+ secrets_json = secrets_response.json()
128
+ secrets_type = secrets_json["secretsType"].lower()
129
+
130
+ # As per this website, only SAS tokens, access tokens, or Entra IDs are valid for accessing blob data
131
+ # stores:
132
+ # https://learn.microsoft.com/rest/api/storageservices/authorize-requests-to-azure-storage.
133
+ if secrets_type == "sas":
134
+ blob_store_credential = AzureSasCredential(secrets_json["sasToken"])
135
+ elif secrets_type == "accountkey":
136
+ # To support older versions of azure-storage-blob better, we return a string here instead of
137
+ # an AzureNamedKeyCredential
138
+ blob_store_credential = secrets_json["key"]
139
+ else:
140
+ raise EvaluationException(
141
+ message=f"The '{account_name}' blob store does not use a recognized credential type.",
142
+ internal_message=f"The credential type is '{secrets_type}'",
143
+ target=ErrorTarget.EVALUATE,
144
+ category=ErrorCategory.INVALID_VALUE,
145
+ blame=ErrorBlame.SYSTEM_ERROR,
146
+ )
147
+
148
+ return BlobStoreInfo(name, account_name, endpoint, container_name, blob_store_credential)
149
+
150
+ def workspace_get_info(self, workspace_name: str) -> Workspace:
151
+ # https://learn.microsoft.com/rest/api/azureml/workspaces/get?view=rest-azureml-2024-10-01
152
+ workspace_response = self._http_client.request(
153
+ "GET",
154
+ self._generate_path(*PATH_ML_WORKSPACES, workspace_name),
155
+ params={QUERY_KEY_API_VERSION: self._api_version},
156
+ headers=self._get_headers(),
157
+ )
158
+
159
+ self._throw_on_http_error(workspace_response, f"get '{workspace_name}' workspace")
160
+ workspace = Workspace.deserialize(workspace_response)
161
+ return workspace
162
+
163
+ def _get_token_manager(self) -> AzureMLTokenManager:
164
+ # Lazy init since getting credentials in the constructor can take a long time in some situations
165
+ if self._token_manager is None:
166
+ with self._lock:
167
+ if self._token_manager is None:
168
+ self._token_manager = AzureMLTokenManager(
169
+ TokenScope.DEFAULT_AZURE_MANAGEMENT.value, self._logger, credential=self._credential
170
+ )
171
+ self._credential = self._token_manager.credential
172
+
173
+ return self._token_manager
174
+
175
+ @staticmethod
176
+ def _throw_on_http_error(response: HttpResponse, description: str, valid_status: Optional[Set[int]] = None) -> None:
177
+ if valid_status and (response.status_code in valid_status):
178
+ return
179
+ if response.status_code >= 200 and response.status_code < 300:
180
+ # nothing to see here, move along
181
+ return
182
+
183
+ message = f"The {description} request failed with HTTP {response.status_code}"
184
+ try:
185
+ error_json = response.json()["error"]
186
+ additional_info = f"({error_json['code']}) {error_json['message']}"
187
+ message += f" - {additional_info}"
188
+ except (JSONDecodeError, ValueError, KeyError):
189
+ pass
190
+
191
+ raise EvaluationException(
192
+ message=message,
193
+ target=ErrorTarget.EVALUATE,
194
+ category=ErrorCategory.FAILED_EXECUTION,
195
+ blame=ErrorBlame.SYSTEM_ERROR,
196
+ )
197
+
198
+ def _generate_path(self, *paths: str) -> str:
199
+ sanitized_paths = [quote(path, safe="") for path in paths]
200
+ url = self._base_url + "/" + str.join("/", sanitized_paths)
201
+ return url
202
+
203
+ def _get_headers(self) -> Dict[str, str]:
204
+ return {"Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json"}