azure-ai-evaluation 1.0.0b1__tar.gz → 1.0.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. azure_ai_evaluation-1.0.0b3/CHANGELOG.md +81 -0
  2. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/PKG-INFO +144 -14
  3. azure_ai_evaluation-1.0.0b1/azure_ai_evaluation.egg-info/PKG-INFO → azure_ai_evaluation-1.0.0b3/README.md +74 -62
  4. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/__init__.py +4 -4
  5. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_common/rai_service.py +4 -4
  6. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_common/utils.py +40 -25
  7. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_constants.py +13 -0
  8. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
  9. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
  10. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
  11. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
  12. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_evaluate.py +88 -63
  13. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
  14. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_utils.py +29 -22
  15. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +70 -0
  16. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
  17. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  18. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
  19. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
  20. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
  21. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  22. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
  23. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
  24. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +55 -0
  25. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +55 -0
  26. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +55 -0
  27. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +55 -0
  28. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_eci/_eci.py +62 -0
  29. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
  30. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +72 -0
  31. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
  32. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +71 -0
  33. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
  34. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
  35. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +57 -0
  36. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
  37. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +77 -0
  38. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
  39. {azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_chat → azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_retrieval}/__init__.py +2 -2
  40. {azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_chat/retrieval → azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_retrieval}/_retrieval.py +17 -29
  41. {azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_chat/retrieval → azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_retrieval}/retrieval.prompty +0 -5
  42. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
  43. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -18
  44. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  45. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_evaluators/_xpia/xpia.py +65 -0
  46. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_exceptions.py +0 -1
  47. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_model_configurations.py +55 -0
  48. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_version.py +1 -1
  49. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/__init__.py +1 -1
  50. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
  51. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  52. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
  53. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
  54. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
  55. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
  56. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
  57. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
  58. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
  59. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
  60. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  61. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
  62. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
  63. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/simulator/simulator.py → azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/simulator/_simulator.py +166 -88
  64. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_tracing.py +21 -24
  65. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_utils.py +4 -1
  66. azure_ai_evaluation-1.0.0b1/README.md → azure_ai_evaluation-1.0.0b3/azure_ai_evaluation.egg-info/PKG-INFO +197 -13
  67. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure_ai_evaluation.egg-info/SOURCES.txt +12 -11
  68. azure_ai_evaluation-1.0.0b3/pyproject.toml +21 -0
  69. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/setup.py +1 -0
  70. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/conftest.py +22 -2
  71. azure_ai_evaluation-1.0.0b3/tests/e2etests/__init__.py +0 -0
  72. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/e2etests/test_builtin_evaluators.py +146 -186
  73. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/e2etests/test_evaluate.py +18 -11
  74. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/e2etests/test_metrics_upload.py +9 -3
  75. azure_ai_evaluation-1.0.0b3/tests/e2etests/test_sim_and_eval.py +134 -0
  76. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_built_in_evaluator.py +4 -9
  77. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_evaluate.py +104 -8
  78. azure_ai_evaluation-1.0.0b3/tests/unittests/test_evaluators/test_inputs_evaluators.py +46 -0
  79. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_non_adv_simulator.py +13 -15
  80. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_simulator.py +2 -2
  81. azure_ai_evaluation-1.0.0b1/CHANGELOG.md +0 -17
  82. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -350
  83. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
  84. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -122
  85. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -66
  86. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -78
  87. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -76
  88. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -76
  89. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -76
  90. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -99
  91. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -122
  92. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -123
  93. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -104
  94. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  95. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  96. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -131
  97. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -140
  98. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/_model_configurations.py +0 -27
  99. azure_ai_evaluation-1.0.0b1/pyproject.toml +0 -6
  100. azure_ai_evaluation-1.0.0b1/tests/unittests/test_chat_evaluator.py +0 -109
  101. azure_ai_evaluation-1.0.0b1/tests/unittests/test_content_safety_chat_evaluator.py +0 -82
  102. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/MANIFEST.in +0 -0
  103. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/__init__.py +0 -0
  104. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/__init__.py +0 -0
  105. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_common/__init__.py +0 -0
  106. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_common/constants.py +0 -0
  107. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  108. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +0 -0
  109. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  110. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  111. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  112. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  113. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  114. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  115. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  116. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  117. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  118. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  119. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  120. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  121. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  122. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  123. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  124. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  125. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  126. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_http_utils.py +3 -3
  127. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/_user_agent.py +0 -0
  128. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/py.typed +0 -0
  129. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  130. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_constants.py +0 -0
  131. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  132. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  133. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  134. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  135. {azure_ai_evaluation-1.0.0b1/tests → azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/simulator/_prompty}/__init__.py +0 -0
  136. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  137. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  138. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  139. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure_ai_evaluation.egg-info/requires.txt +0 -0
  140. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  141. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/setup.cfg +0 -0
  142. {azure_ai_evaluation-1.0.0b1/tests/e2etests → azure_ai_evaluation-1.0.0b3/tests}/__init__.py +0 -0
  143. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/__openai_patcher.py +0 -0
  144. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  145. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/e2etests/target_fn.py +0 -0
  146. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/e2etests/test_adv_simulator.py +0 -0
  147. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_batch_run_context.py +0 -0
  148. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  149. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_content_safety_rai_script.py +0 -0
  150. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_eval_run.py +0 -0
  151. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_evaluate_telemetry.py +0 -0
  152. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
  153. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_jailbreak_simulator.py +0 -0
  154. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_save_eval.py +0 -0
  155. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  156. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  157. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b3}/tests/unittests/test_utils.py +0 -0
@@ -0,0 +1,81 @@
1
+ # Release History
2
+
3
+ ## 1.0.0b3 (2024-10-01)
4
+
5
+ ### Features Added
6
+
7
+ - Added `type` field to `AzureOpenAIModelConfiguration` and `OpenAIModelConfiguration`
8
+ - The following evaluators now support `conversation` as an alternative input to their usual single-turn inputs:
9
+ - `ViolenceEvaluator`
10
+ - `SexualEvaluator`
11
+ - `SelfHarmEvaluator`
12
+ - `HateUnfairnessEvaluator`
13
+ - `ProtectedMaterialEvaluator`
14
+ - `IndirectAttackEvaluator`
15
+ - `CoherenceEvaluator`
16
+ - `RelevanceEvaluator`
17
+ - `FluencyEvaluator`
18
+ - `GroundednessEvaluator`
19
+ - Surfaced `RetrievalScoreEvaluator`, formally an internal part of `ChatEvaluator` as a standalone conversation-only evaluator.
20
+
21
+ ### Breaking Changes
22
+
23
+ - Removed `ContentSafetyChatEvaluator` and `ChatEvaluator`
24
+ - The `evaluator_config` parameter of `evaluate` now maps in evaluator name to a dictionary `EvaluatorConfig`, which is a `TypedDict`. The
25
+ `column_mapping` between `data` or `target` and evaluator field names should now be specified inside this new dictionary:
26
+
27
+ Before:
28
+ ```python
29
+ evaluate(
30
+ ...,
31
+ evaluator_config={
32
+ "hate_unfairness": {
33
+ "query": "${data.question}",
34
+ "response": "${data.answer}",
35
+ }
36
+ },
37
+ ...
38
+ )
39
+ ```
40
+
41
+ After
42
+ ```python
43
+ evaluate(
44
+ ...,
45
+ evaluator_config={
46
+ "hate_unfairness": {
47
+ "column_mapping": {
48
+ "query": "${data.question}",
49
+ "response": "${data.answer}",
50
+ }
51
+ }
52
+ },
53
+ ...
54
+ )
55
+ ```
56
+
57
+ ### Bugs Fixed
58
+
59
+ - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
60
+
61
+ ## 1.0.0b2 (2024-09-24)
62
+
63
+ ### Breaking Changes
64
+
65
+ - `data` and `evaluators` are now required keywords in `evaluate`.
66
+
67
+ ## 1.0.0b1 (2024-09-20)
68
+
69
+ ### Breaking Changes
70
+
71
+ - The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
72
+ - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
73
+ - The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
74
+ - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
75
+ - Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
76
+
77
+ ### Features Added
78
+
79
+ - First preview
80
+ - This package is port of `promptflow-evals`. New features will be added only to this package moving forward.
81
+ - Added a `TypedDict` for `AzureAIProject` that allows for better intellisense and type checking when passing in project information
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.0.0b1
3
+ Version: 1.0.0b3
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -35,11 +35,27 @@ Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "pf-azure"
35
35
 
36
36
  # Azure AI Evaluation client library for Python
37
37
 
38
+ We are excited to introduce the public preview of the Azure AI Evaluation SDK.
39
+
40
+ [Source code][source_code]
41
+ | [Package (PyPI)][evaluation_pypi]
42
+ | [API reference documentation][evaluation_ref_docs]
43
+ | [Product documentation][product_documentation]
44
+ | [Samples][evaluation_samples]
45
+
46
+ This package has been tested with Python 3.8, 3.9, 3.10, 3.11, and 3.12.
47
+
48
+ For a more complete set of Azure libraries, see https://aka.ms/azsdk/python/all
49
+
38
50
  ## Getting started
39
51
 
52
+ ### Prerequisites
53
+
54
+ - Python 3.8 or later is required to use this package.
55
+
40
56
  ### Install the package
41
57
 
42
- Install the Azure AI Evaluation library for Python with:
58
+ Install the Azure AI Evaluation library for Python with [pip][pip_link]::
43
59
 
44
60
  ```bash
45
61
  pip install azure-ai-evaluation
@@ -51,6 +67,8 @@ Evaluators are custom or prebuilt classes or functions that are designed to meas
51
67
 
52
68
  ## Examples
53
69
 
70
+ ### Evaluators
71
+
54
72
  Users can create evaluator runs on the local machine as shown in the example below:
55
73
 
56
74
  ```python
@@ -92,9 +110,9 @@ if __name__ == "__main__":
92
110
 
93
111
  # Initialize Project Scope
94
112
  azure_ai_project = {
95
- "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054",
96
- "resource_group_name": "rg-test",
97
- "project_name": "project-test",
113
+ "subscription_id": <subscription_id>,
114
+ "resource_group_name": <resource_group_name>,
115
+ "project_name": <project_name>
98
116
  }
99
117
 
100
118
  violence_eval = ViolenceEvaluator(azure_ai_project)
@@ -122,9 +140,13 @@ if __name__ == "__main__":
122
140
 
123
141
  pprint(result)
124
142
  ```
125
- ## Simulator
143
+ ### Simulator
144
+
145
+
146
+ Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
147
+ their AI application.
126
148
 
127
- Sample application prompty
149
+ #### Simulating with a Prompty
128
150
 
129
151
  ```yaml
130
152
  ---
@@ -163,7 +185,7 @@ Application code:
163
185
  import json
164
186
  import asyncio
165
187
  from typing import Any, Dict, List, Optional
166
- from azure.ai.evaluation.synthetic import Simulator
188
+ from azure.ai.evaluation.simulator import Simulator
167
189
  from promptflow.client import load_flow
168
190
  from azure.identity import DefaultAzureCredential
169
191
  import os
@@ -171,8 +193,7 @@ import os
171
193
  azure_ai_project = {
172
194
  "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
173
195
  "resource_group_name": os.environ.get("RESOURCE_GROUP"),
174
- "project_name": os.environ.get("PROJECT_NAME"),
175
- "credential": DefaultAzureCredential(),
196
+ "project_name": os.environ.get("PROJECT_NAME")
176
197
  }
177
198
 
178
199
  import wikipedia
@@ -249,8 +270,7 @@ if __name__ == "__main__":
249
270
  print("done!")
250
271
  ```
251
272
 
252
- Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
253
- their AI application. Here's a sample of a callback which invokes AsyncAzureOpenAI:
273
+ #### Adversarial Simulator
254
274
 
255
275
  ```python
256
276
  from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
@@ -318,7 +338,9 @@ async def callback(
318
338
  }
319
339
 
320
340
  ```
321
- ### Adversarial QA:
341
+
342
+ #### Adversarial QA
343
+
322
344
  ```python
323
345
  scenario = AdversarialScenario.ADVERSARIAL_QA
324
346
  simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
@@ -334,7 +356,7 @@ outputs = asyncio.run(
334
356
 
335
357
  print(outputs.to_eval_qa_json_lines())
336
358
  ```
337
- ### Direct Attack Simulator
359
+ #### Direct Attack Simulator
338
360
 
339
361
  ```python
340
362
  scenario = AdversarialScenario.ADVERSARIAL_QA
@@ -353,13 +375,121 @@ print(outputs)
353
375
  ```
354
376
  ## Troubleshooting
355
377
 
378
+ ### General
379
+
380
+ Azure ML clients raise exceptions defined in [Azure Core][azure_core_readme].
381
+
382
+ ### Logging
383
+
384
+ This library uses the standard
385
+ [logging][python_logging] library for logging.
386
+ Basic information about HTTP sessions (URLs, headers, etc.) is logged at INFO
387
+ level.
388
+
389
+ Detailed DEBUG level logging, including request/response bodies and unredacted
390
+ headers, can be enabled on a client with the `logging_enable` argument.
391
+
392
+ See full SDK logging documentation with examples [here][sdk_logging_docs].
393
+
356
394
  ## Next steps
357
395
 
396
+ - View our [samples][evaluation_samples].
397
+ - View our [documentation][product_documentation]
398
+
358
399
  ## Contributing
359
400
 
401
+ This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [cla.microsoft.com][cla].
402
+
403
+ When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
404
+
405
+ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information see the [Code of Conduct FAQ][coc_faq] or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments.
406
+
407
+ <!-- LINKS -->
408
+
409
+ [source_code]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/evaluation/azure-ai-evaluation
410
+ [evaluation_pypi]: https://pypi.org/project/azure-ai-evaluation/
411
+ [evaluation_ref_docs]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
412
+ [evaluation_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios
413
+ [product_documentation]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk
414
+ [python_logging]: https://docs.python.org/3/library/logging.html
415
+ [sdk_logging_docs]: https://docs.microsoft.com/azure/developer/python/azure-sdk-logging
416
+ [azure_core_readme]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
417
+ [pip_link]: https://pypi.org/project/pip/
418
+ [azure_core_ref_docs]: https://aka.ms/azsdk-python-core-policies
419
+ [azure_core]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
420
+ [azure_identity]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/identity/azure-identity
421
+ [cla]: https://cla.microsoft.com
422
+ [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/
423
+ [coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/
424
+ [coc_contact]: mailto:opencode@microsoft.com
425
+
360
426
 
361
427
  # Release History
362
428
 
429
+ ## 1.0.0b3 (2024-10-01)
430
+
431
+ ### Features Added
432
+
433
+ - Added `type` field to `AzureOpenAIModelConfiguration` and `OpenAIModelConfiguration`
434
+ - The following evaluators now support `conversation` as an alternative input to their usual single-turn inputs:
435
+ - `ViolenceEvaluator`
436
+ - `SexualEvaluator`
437
+ - `SelfHarmEvaluator`
438
+ - `HateUnfairnessEvaluator`
439
+ - `ProtectedMaterialEvaluator`
440
+ - `IndirectAttackEvaluator`
441
+ - `CoherenceEvaluator`
442
+ - `RelevanceEvaluator`
443
+ - `FluencyEvaluator`
444
+ - `GroundednessEvaluator`
445
+ - Surfaced `RetrievalScoreEvaluator`, formally an internal part of `ChatEvaluator` as a standalone conversation-only evaluator.
446
+
447
+ ### Breaking Changes
448
+
449
+ - Removed `ContentSafetyChatEvaluator` and `ChatEvaluator`
450
+ - The `evaluator_config` parameter of `evaluate` now maps in evaluator name to a dictionary `EvaluatorConfig`, which is a `TypedDict`. The
451
+ `column_mapping` between `data` or `target` and evaluator field names should now be specified inside this new dictionary:
452
+
453
+ Before:
454
+ ```python
455
+ evaluate(
456
+ ...,
457
+ evaluator_config={
458
+ "hate_unfairness": {
459
+ "query": "${data.question}",
460
+ "response": "${data.answer}",
461
+ }
462
+ },
463
+ ...
464
+ )
465
+ ```
466
+
467
+ After
468
+ ```python
469
+ evaluate(
470
+ ...,
471
+ evaluator_config={
472
+ "hate_unfairness": {
473
+ "column_mapping": {
474
+ "query": "${data.question}",
475
+ "response": "${data.answer}",
476
+ }
477
+ }
478
+ },
479
+ ...
480
+ )
481
+ ```
482
+
483
+ ### Bugs Fixed
484
+
485
+ - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
486
+
487
+ ## 1.0.0b2 (2024-09-24)
488
+
489
+ ### Breaking Changes
490
+
491
+ - `data` and `evaluators` are now required keywords in `evaluate`.
492
+
363
493
  ## 1.0.0b1 (2024-09-20)
364
494
 
365
495
  ### Breaking Changes
@@ -1,45 +1,26 @@
1
- Metadata-Version: 2.1
2
- Name: azure-ai-evaluation
3
- Version: 1.0.0b1
4
- Summary: Microsoft Azure Evaluation Library for Python
5
- Home-page: https://github.com/Azure/azure-sdk-for-python
6
- Author: Microsoft Corporation
7
- Author-email: azuresdkengsysadmins@microsoft.com
8
- License: MIT License
9
- Project-URL: Bug Reports, https://github.com/Azure/azure-sdk-for-python/issues
10
- Project-URL: Source, https://github.com/Azure/azure-sdk-for-python
11
- Keywords: azure,azure sdk
12
- Classifier: Development Status :: 4 - Beta
13
- Classifier: Programming Language :: Python
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3 :: Only
16
- Classifier: Programming Language :: Python :: 3.8
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: License :: OSI Approved :: MIT License
21
- Classifier: Operating System :: OS Independent
22
- Requires-Python: >=3.8
23
- Description-Content-Type: text/markdown
24
- Requires-Dist: promptflow-devkit>=1.15.0
25
- Requires-Dist: promptflow-core>=1.15.0
26
- Requires-Dist: numpy>=1.23.2; python_version < "3.12"
27
- Requires-Dist: numpy>=1.26.4; python_version >= "3.12"
28
- Requires-Dist: pyjwt>=2.8.0
29
- Requires-Dist: azure-identity>=1.12.0
30
- Requires-Dist: azure-core>=1.30.2
31
- Requires-Dist: nltk>=3.9.1
32
- Requires-Dist: rouge-score>=0.1.2
33
- Provides-Extra: pf-azure
34
- Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "pf-azure"
35
-
36
1
  # Azure AI Evaluation client library for Python
37
2
 
3
+ We are excited to introduce the public preview of the Azure AI Evaluation SDK.
4
+
5
+ [Source code][source_code]
6
+ | [Package (PyPI)][evaluation_pypi]
7
+ | [API reference documentation][evaluation_ref_docs]
8
+ | [Product documentation][product_documentation]
9
+ | [Samples][evaluation_samples]
10
+
11
+ This package has been tested with Python 3.8, 3.9, 3.10, 3.11, and 3.12.
12
+
13
+ For a more complete set of Azure libraries, see https://aka.ms/azsdk/python/all
14
+
38
15
  ## Getting started
39
16
 
17
+ ### Prerequisites
18
+
19
+ - Python 3.8 or later is required to use this package.
20
+
40
21
  ### Install the package
41
22
 
42
- Install the Azure AI Evaluation library for Python with:
23
+ Install the Azure AI Evaluation library for Python with [pip][pip_link]::
43
24
 
44
25
  ```bash
45
26
  pip install azure-ai-evaluation
@@ -51,6 +32,8 @@ Evaluators are custom or prebuilt classes or functions that are designed to meas
51
32
 
52
33
  ## Examples
53
34
 
35
+ ### Evaluators
36
+
54
37
  Users can create evaluator runs on the local machine as shown in the example below:
55
38
 
56
39
  ```python
@@ -92,9 +75,9 @@ if __name__ == "__main__":
92
75
 
93
76
  # Initialize Project Scope
94
77
  azure_ai_project = {
95
- "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054",
96
- "resource_group_name": "rg-test",
97
- "project_name": "project-test",
78
+ "subscription_id": <subscription_id>,
79
+ "resource_group_name": <resource_group_name>,
80
+ "project_name": <project_name>
98
81
  }
99
82
 
100
83
  violence_eval = ViolenceEvaluator(azure_ai_project)
@@ -122,9 +105,13 @@ if __name__ == "__main__":
122
105
 
123
106
  pprint(result)
124
107
  ```
125
- ## Simulator
108
+ ### Simulator
126
109
 
127
- Sample application prompty
110
+
111
+ Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
112
+ their AI application.
113
+
114
+ #### Simulating with a Prompty
128
115
 
129
116
  ```yaml
130
117
  ---
@@ -163,7 +150,7 @@ Application code:
163
150
  import json
164
151
  import asyncio
165
152
  from typing import Any, Dict, List, Optional
166
- from azure.ai.evaluation.synthetic import Simulator
153
+ from azure.ai.evaluation.simulator import Simulator
167
154
  from promptflow.client import load_flow
168
155
  from azure.identity import DefaultAzureCredential
169
156
  import os
@@ -171,8 +158,7 @@ import os
171
158
  azure_ai_project = {
172
159
  "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
173
160
  "resource_group_name": os.environ.get("RESOURCE_GROUP"),
174
- "project_name": os.environ.get("PROJECT_NAME"),
175
- "credential": DefaultAzureCredential(),
161
+ "project_name": os.environ.get("PROJECT_NAME")
176
162
  }
177
163
 
178
164
  import wikipedia
@@ -249,8 +235,7 @@ if __name__ == "__main__":
249
235
  print("done!")
250
236
  ```
251
237
 
252
- Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
253
- their AI application. Here's a sample of a callback which invokes AsyncAzureOpenAI:
238
+ #### Adversarial Simulator
254
239
 
255
240
  ```python
256
241
  from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
@@ -318,7 +303,9 @@ async def callback(
318
303
  }
319
304
 
320
305
  ```
321
- ### Adversarial QA:
306
+
307
+ #### Adversarial QA
308
+
322
309
  ```python
323
310
  scenario = AdversarialScenario.ADVERSARIAL_QA
324
311
  simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
@@ -334,7 +321,7 @@ outputs = asyncio.run(
334
321
 
335
322
  print(outputs.to_eval_qa_json_lines())
336
323
  ```
337
- ### Direct Attack Simulator
324
+ #### Direct Attack Simulator
338
325
 
339
326
  ```python
340
327
  scenario = AdversarialScenario.ADVERSARIAL_QA
@@ -353,25 +340,50 @@ print(outputs)
353
340
  ```
354
341
  ## Troubleshooting
355
342
 
356
- ## Next steps
343
+ ### General
357
344
 
358
- ## Contributing
345
+ Azure ML clients raise exceptions defined in [Azure Core][azure_core_readme].
346
+
347
+ ### Logging
359
348
 
349
+ This library uses the standard
350
+ [logging][python_logging] library for logging.
351
+ Basic information about HTTP sessions (URLs, headers, etc.) is logged at INFO
352
+ level.
360
353
 
361
- # Release History
354
+ Detailed DEBUG level logging, including request/response bodies and unredacted
355
+ headers, can be enabled on a client with the `logging_enable` argument.
362
356
 
363
- ## 1.0.0b1 (2024-09-20)
357
+ See full SDK logging documentation with examples [here][sdk_logging_docs].
364
358
 
365
- ### Breaking Changes
359
+ ## Next steps
366
360
 
367
- - The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
368
- - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
369
- - The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
370
- - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
371
- - Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
361
+ - View our [samples][evaluation_samples].
362
+ - View our [documentation][product_documentation]
372
363
 
373
- ### Features Added
364
+ ## Contributing
374
365
 
375
- - First preview
376
- - This package is port of `promptflow-evals`. New features will be added only to this package moving forward.
377
- - Added a `TypedDict` for `AzureAIProject` that allows for better intellisense and type checking when passing in project information
366
+ This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [cla.microsoft.com][cla].
367
+
368
+ When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
369
+
370
+ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information see the [Code of Conduct FAQ][coc_faq] or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments.
371
+
372
+ <!-- LINKS -->
373
+
374
+ [source_code]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/evaluation/azure-ai-evaluation
375
+ [evaluation_pypi]: https://pypi.org/project/azure-ai-evaluation/
376
+ [evaluation_ref_docs]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
377
+ [evaluation_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios
378
+ [product_documentation]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk
379
+ [python_logging]: https://docs.python.org/3/library/logging.html
380
+ [sdk_logging_docs]: https://docs.microsoft.com/azure/developer/python/azure-sdk-logging
381
+ [azure_core_readme]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
382
+ [pip_link]: https://pypi.org/project/pip/
383
+ [azure_core_ref_docs]: https://aka.ms/azsdk-python-core-policies
384
+ [azure_core]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
385
+ [azure_identity]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/identity/azure-identity
386
+ [cla]: https://cla.microsoft.com
387
+ [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/
388
+ [coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/
389
+ [coc_contact]: mailto:opencode@microsoft.com
@@ -4,10 +4,8 @@
4
4
 
5
5
  from ._evaluate._evaluate import evaluate
6
6
  from ._evaluators._bleu import BleuScoreEvaluator
7
- from ._evaluators._chat import ChatEvaluator
8
7
  from ._evaluators._coherence import CoherenceEvaluator
9
8
  from ._evaluators._content_safety import (
10
- ContentSafetyChatEvaluator,
11
9
  ContentSafetyEvaluator,
12
10
  HateUnfairnessEvaluator,
13
11
  SelfHarmEvaluator,
@@ -22,6 +20,7 @@ from ._evaluators._meteor import MeteorScoreEvaluator
22
20
  from ._evaluators._protected_material import ProtectedMaterialEvaluator
23
21
  from ._evaluators._qa import QAEvaluator
24
22
  from ._evaluators._relevance import RelevanceEvaluator
23
+ from ._evaluators._retrieval import RetrievalEvaluator
25
24
  from ._evaluators._rouge import RougeScoreEvaluator, RougeType
26
25
  from ._evaluators._similarity import SimilarityEvaluator
27
26
  from ._evaluators._xpia import IndirectAttackEvaluator
@@ -29,6 +28,7 @@ from ._model_configurations import (
29
28
  AzureAIProject,
30
29
  AzureOpenAIModelConfiguration,
31
30
  OpenAIModelConfiguration,
31
+ EvaluatorConfig,
32
32
  )
33
33
 
34
34
  __all__ = [
@@ -40,21 +40,21 @@ __all__ = [
40
40
  "RelevanceEvaluator",
41
41
  "SimilarityEvaluator",
42
42
  "QAEvaluator",
43
- "ChatEvaluator",
44
43
  "ViolenceEvaluator",
45
44
  "SexualEvaluator",
46
45
  "SelfHarmEvaluator",
47
46
  "HateUnfairnessEvaluator",
48
47
  "ContentSafetyEvaluator",
49
- "ContentSafetyChatEvaluator",
50
48
  "IndirectAttackEvaluator",
51
49
  "BleuScoreEvaluator",
52
50
  "GleuScoreEvaluator",
53
51
  "MeteorScoreEvaluator",
52
+ "RetrievalEvaluator",
54
53
  "RougeScoreEvaluator",
55
54
  "RougeType",
56
55
  "ProtectedMaterialEvaluator",
57
56
  "AzureAIProject",
58
57
  "AzureOpenAIModelConfiguration",
59
58
  "OpenAIModelConfiguration",
59
+ "EvaluatorConfig",
60
60
  ]
@@ -11,12 +11,12 @@ from urllib.parse import urlparse
11
11
 
12
12
  import jwt
13
13
  import numpy as np
14
- from azure.core.credentials import TokenCredential
15
- from azure.identity import DefaultAzureCredential
16
14
 
15
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
16
  from azure.ai.evaluation._http_utils import get_async_http_client
18
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
19
17
  from azure.ai.evaluation._model_configurations import AzureAIProject
18
+ from azure.core.credentials import TokenCredential
19
+ from azure.identity import DefaultAzureCredential
20
20
 
21
21
  from .constants import (
22
22
  CommonConstants,
@@ -348,7 +348,7 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
348
348
  )
349
349
 
350
350
  if response.status_code != 200:
351
- msg = f"Failed to retrieve the discovery service URL."
351
+ msg = "Failed to retrieve the discovery service URL."
352
352
  raise EvaluationException(
353
353
  message=msg,
354
354
  internal_message=msg,