azure-ai-evaluation 1.0.0b3__tar.gz → 1.0.0b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (147) hide show
  1. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/CHANGELOG.md +37 -3
  2. azure_ai_evaluation-1.0.0b4/NOTICE.txt +50 -0
  3. {azure_ai_evaluation-1.0.0b3/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.0.0b4}/PKG-INFO +72 -44
  4. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/README.md +30 -34
  5. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/constants.py +4 -2
  6. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_common/math.py +18 -0
  7. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/rai_service.py +54 -62
  8. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_common/utils.py +272 -0
  9. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_constants.py +10 -2
  10. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +10 -3
  11. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +33 -17
  12. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +17 -2
  13. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_eval_run.py +26 -10
  14. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_evaluate.py +116 -62
  15. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +16 -17
  16. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_utils.py +44 -25
  17. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +3 -2
  18. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +59 -30
  19. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +10 -13
  20. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +18 -20
  21. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -20
  22. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +63 -42
  23. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +4 -4
  24. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +4 -4
  25. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +4 -4
  26. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +4 -4
  27. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_eci/_eci.py +4 -4
  28. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
  29. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -2
  30. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +3 -2
  31. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +4 -4
  32. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -3
  33. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +3 -2
  34. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +11 -8
  35. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
  36. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +21 -7
  37. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +4 -5
  38. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_exceptions.py +9 -6
  39. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_http_utils.py +203 -132
  40. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_model_configurations.py +5 -5
  41. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/__init__.py +3 -0
  42. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  43. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  44. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  45. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  46. azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  47. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_version.py +1 -1
  48. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_adversarial_simulator.py +85 -60
  49. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
  50. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
  51. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
  52. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/_experimental.py +20 -9
  53. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +4 -4
  54. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +22 -64
  55. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
  56. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
  57. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
  58. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
  59. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -5
  60. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -4
  61. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_simulator.py +112 -113
  62. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_tracing.py +4 -4
  63. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4/azure_ai_evaluation.egg-info}/PKG-INFO +72 -44
  64. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/SOURCES.txt +8 -0
  65. azure_ai_evaluation-1.0.0b4/azure_ai_evaluation.egg-info/requires.txt +9 -0
  66. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/pyproject.toml +1 -2
  67. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/setup.py +3 -5
  68. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +9 -2
  69. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_adv_simulator.py +51 -24
  70. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_builtin_evaluators.py +16 -16
  71. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_evaluate.py +12 -8
  72. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_sim_and_eval.py +2 -3
  73. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_content_safety_rai_script.py +11 -11
  74. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_eval_run.py +5 -2
  75. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluate.py +4 -4
  76. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluate_telemetry.py +10 -9
  77. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_jailbreak_simulator.py +4 -3
  78. azure_ai_evaluation-1.0.0b4/tests/unittests/test_non_adv_simulator.py +359 -0
  79. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_simulator.py +4 -5
  80. azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_common/utils.py +0 -102
  81. azure_ai_evaluation-1.0.0b3/azure_ai_evaluation.egg-info/requires.txt +0 -16
  82. azure_ai_evaluation-1.0.0b3/tests/unittests/test_non_adv_simulator.py +0 -129
  83. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/MANIFEST.in +0 -0
  84. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/__init__.py +0 -0
  85. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/__init__.py +0 -0
  86. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/__init__.py +1 -1
  87. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/__init__.py +0 -0
  88. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  89. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +0 -0
  90. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  91. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  92. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
  93. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  94. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  95. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  96. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  97. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  98. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  99. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  100. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  101. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  102. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  103. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  104. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -0
  105. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  106. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  107. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  108. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  109. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  110. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  111. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  112. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  113. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  114. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  115. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  116. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  117. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_user_agent.py +0 -0
  118. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/py.typed +0 -0
  119. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/__init__.py +0 -0
  120. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  121. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_constants.py +0 -0
  122. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  123. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  124. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  125. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  126. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  127. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  128. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_utils.py +0 -0
  129. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  130. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  131. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  132. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/setup.cfg +0 -0
  133. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/__init__.py +0 -0
  134. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/__openai_patcher.py +0 -0
  135. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/conftest.py +0 -0
  136. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/__init__.py +0 -0
  137. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/target_fn.py +0 -0
  138. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_metrics_upload.py +0 -0
  139. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_batch_run_context.py +0 -0
  140. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_built_in_evaluator.py +0 -0
  141. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  142. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
  143. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  144. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_save_eval.py +0 -0
  145. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  146. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  147. {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,19 @@
1
1
  # Release History
2
2
 
3
+ ## 1.0.0b4 (2024-10-16)
4
+
5
+ ### Breaking Changes
6
+
7
+ - Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
8
+ - `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.
9
+ - Changed package extra name from "pf-azure" to "remote".
10
+
11
+ ### Bugs Fixed
12
+ - Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
13
+
14
+ ### Other Changes
15
+ - Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
16
+
3
17
  ## 1.0.0b3 (2024-10-01)
4
18
 
5
19
  ### Features Added
@@ -54,9 +68,29 @@ evaluate(
54
68
  )
55
69
  ```
56
70
 
71
+ - Simulator now requires a model configuration to call the prompty instead of an Azure AI project scope. This enables the usage of simulator with Entra ID based auth.
72
+ Before:
73
+ ```python
74
+ azure_ai_project = {
75
+ "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
76
+ "resource_group_name": os.environ.get("RESOURCE_GROUP"),
77
+ "project_name": os.environ.get("PROJECT_NAME"),
78
+ }
79
+ sim = Simulator(azure_ai_project=azure_ai_project, credentails=DefaultAzureCredentials())
80
+ ```
81
+ After:
82
+ ```python
83
+ model_config = {
84
+ "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
85
+ "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
86
+ }
87
+ sim = Simulator(model_config=model_config)
88
+ ```
89
+ If `api_key` is not included in the `model_config`, the prompty runtime in `promptflow-core` will pick up `DefaultAzureCredential`.
90
+
57
91
  ### Bugs Fixed
58
92
 
59
- - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
93
+ - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
60
94
 
61
95
  ## 1.0.0b2 (2024-09-24)
62
96
 
@@ -69,9 +103,9 @@ evaluate(
69
103
  ### Breaking Changes
70
104
 
71
105
  - The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
72
- - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
106
+ - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
73
107
  - The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
74
- - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
108
+ - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
75
109
  - Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
76
110
 
77
111
  ### Features Added
@@ -0,0 +1,50 @@
1
+ NOTICES AND INFORMATION
2
+ Do Not Translate or Localize
3
+
4
+ This software incorporates material from third parties.
5
+ Microsoft makes certain open source code available at https://3rdpartysource.microsoft.com,
6
+ or you may send a check or money order for US $5.00, including the product name,
7
+ the open source component name, platform, and version number, to:
8
+
9
+ Source Code Compliance Team
10
+ Microsoft Corporation
11
+ One Microsoft Way
12
+ Redmond, WA 98052
13
+ USA
14
+
15
+ Notwithstanding any other terms, you may reverse engineer this software to the extent
16
+ required to debug changes to any libraries licensed under the GNU Lesser General Public License.
17
+
18
+ License notice for nltk
19
+ ---------------------------------------------------------
20
+
21
+ Copyright 2024 The NLTK Project
22
+
23
+ Licensed under the Apache License, Version 2.0 (the "License");
24
+ you may not use this file except in compliance with the License.
25
+ You may obtain a copy of the License at
26
+
27
+ http://www.apache.org/licenses/LICENSE-2.0
28
+
29
+ Unless required by applicable law or agreed to in writing, software
30
+ distributed under the License is distributed on an "AS IS" BASIS,
31
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
32
+ See the License for the specific language governing permissions and
33
+ limitations under the License.
34
+
35
+ License notice for rouge-score
36
+ ---------------------------------------------------------
37
+
38
+ Copyright 2024 The Google Research Authors
39
+
40
+ Licensed under the Apache License, Version 2.0 (the "License");
41
+ you may not use this file except in compliance with the License.
42
+ You may obtain a copy of the License at
43
+
44
+ http://www.apache.org/licenses/LICENSE-2.0
45
+
46
+ Unless required by applicable law or agreed to in writing, software
47
+ distributed under the License is distributed on an "AS IS" BASIS,
48
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
49
+ See the License for the specific language governing permissions and
50
+ limitations under the License.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.0.0b3
3
+ Version: 1.0.0b4
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -21,17 +21,15 @@ Classifier: License :: OSI Approved :: MIT License
21
21
  Classifier: Operating System :: OS Independent
22
22
  Requires-Python: >=3.8
23
23
  Description-Content-Type: text/markdown
24
+ License-File: NOTICE.txt
24
25
  Requires-Dist: promptflow-devkit>=1.15.0
25
26
  Requires-Dist: promptflow-core>=1.15.0
26
- Requires-Dist: numpy>=1.23.2; python_version < "3.12"
27
- Requires-Dist: numpy>=1.26.4; python_version >= "3.12"
28
27
  Requires-Dist: pyjwt>=2.8.0
29
- Requires-Dist: azure-identity>=1.12.0
28
+ Requires-Dist: azure-identity>=1.16.0
30
29
  Requires-Dist: azure-core>=1.30.2
31
30
  Requires-Dist: nltk>=3.9.1
32
- Requires-Dist: rouge-score>=0.1.2
33
- Provides-Extra: pf-azure
34
- Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "pf-azure"
31
+ Provides-Extra: remote
32
+ Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "remote"
35
33
 
36
34
  # Azure AI Evaluation client library for Python
37
35
 
@@ -154,11 +152,6 @@ name: ApplicationPrompty
154
152
  description: Simulates an application
155
153
  model:
156
154
  api: chat
157
- configuration:
158
- type: azure_openai
159
- azure_deployment: ${env:AZURE_DEPLOYMENT}
160
- api_key: ${env:AZURE_OPENAI_API_KEY}
161
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
162
155
  parameters:
163
156
  temperature: 0.0
164
157
  top_p: 1.0
@@ -187,52 +180,55 @@ import asyncio
187
180
  from typing import Any, Dict, List, Optional
188
181
  from azure.ai.evaluation.simulator import Simulator
189
182
  from promptflow.client import load_flow
190
- from azure.identity import DefaultAzureCredential
191
183
  import os
184
+ import wikipedia
192
185
 
193
- azure_ai_project = {
194
- "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
195
- "resource_group_name": os.environ.get("RESOURCE_GROUP"),
196
- "project_name": os.environ.get("PROJECT_NAME")
186
+ # Set up the model configuration without api_key, using DefaultAzureCredential
187
+ model_config = {
188
+ "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
189
+ "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
190
+ # not providing key would make the SDK pick up `DefaultAzureCredential`
191
+ # use "api_key": "<your API key>"
197
192
  }
198
193
 
199
- import wikipedia
200
- wiki_search_term = "Leonardo da vinci"
194
+ # Use Wikipedia to get some text for the simulation
195
+ wiki_search_term = "Leonardo da Vinci"
201
196
  wiki_title = wikipedia.search(wiki_search_term)[0]
202
197
  wiki_page = wikipedia.page(wiki_title)
203
198
  text = wiki_page.summary[:1000]
204
199
 
205
- def method_to_invoke_application_prompty(query: str):
200
+ def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]):
206
201
  try:
207
202
  current_dir = os.path.dirname(__file__)
208
203
  prompty_path = os.path.join(current_dir, "application.prompty")
209
- _flow = load_flow(source=prompty_path, model={
210
- "configuration": azure_ai_project
211
- })
204
+ _flow = load_flow(
205
+ source=prompty_path,
206
+ model=model_config,
207
+ credential=DefaultAzureCredential()
208
+ )
212
209
  response = _flow(
213
210
  query=query,
214
211
  context=context,
215
212
  conversation_history=messages_list
216
213
  )
217
214
  return response
218
- except:
219
- print("Something went wrong invoking the prompty")
215
+ except Exception as e:
216
+ print(f"Something went wrong invoking the prompty: {e}")
220
217
  return "something went wrong"
221
218
 
222
219
  async def callback(
223
- messages: List[Dict],
220
+ messages: Dict[str, List[Dict]],
224
221
  stream: bool = False,
225
222
  session_state: Any = None, # noqa: ANN401
226
223
  context: Optional[Dict[str, Any]] = None,
227
224
  ) -> dict:
228
225
  messages_list = messages["messages"]
229
- # get last message
226
+ # Get the last message from the user
230
227
  latest_message = messages_list[-1]
231
228
  query = latest_message["content"]
232
- context = None
233
- # call your endpoint or ai application here
234
- response = method_to_invoke_application_prompty(query)
235
- # we are formatting the response to follow the openAI chat protocol format
229
+ # Call your endpoint or AI application here
230
+ response = method_to_invoke_application_prompty(query, messages_list, context)
231
+ # Format the response to follow the OpenAI chat protocol format
236
232
  formatted_response = {
237
233
  "content": response,
238
234
  "role": "assistant",
@@ -243,10 +239,8 @@ async def callback(
243
239
  messages["messages"].append(formatted_response)
244
240
  return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}
245
241
 
246
-
247
-
248
242
  async def main():
249
- simulator = Simulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
243
+ simulator = Simulator(model_config=model_config)
250
244
  outputs = await simulator(
251
245
  target=callback,
252
246
  text=text,
@@ -257,17 +251,17 @@ async def main():
257
251
  f"I am a teacher and I want to teach my students about {wiki_search_term}"
258
252
  ],
259
253
  )
260
- print(json.dumps(outputs))
254
+ print(json.dumps(outputs, indent=2))
261
255
 
262
256
  if __name__ == "__main__":
263
- os.environ["AZURE_SUBSCRIPTION_ID"] = ""
264
- os.environ["RESOURCE_GROUP"] = ""
265
- os.environ["PROJECT_NAME"] = ""
266
- os.environ["AZURE_OPENAI_API_KEY"] = ""
267
- os.environ["AZURE_OPENAI_ENDPOINT"] = ""
268
- os.environ["AZURE_DEPLOYMENT"] = ""
257
+ # Ensure that the following environment variables are set in your environment:
258
+ # AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT
259
+ # Example:
260
+ # os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/"
261
+ # os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name"
269
262
  asyncio.run(main())
270
263
  print("done!")
264
+
271
265
  ```
272
266
 
273
267
  #### Adversarial Simulator
@@ -426,6 +420,20 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
426
420
 
427
421
  # Release History
428
422
 
423
+ ## 1.0.0b4 (2024-10-16)
424
+
425
+ ### Breaking Changes
426
+
427
+ - Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
428
+ - `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.
429
+ - Changed package extra name from "pf-azure" to "remote".
430
+
431
+ ### Bugs Fixed
432
+ - Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
433
+
434
+ ### Other Changes
435
+ - Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
436
+
429
437
  ## 1.0.0b3 (2024-10-01)
430
438
 
431
439
  ### Features Added
@@ -480,9 +488,29 @@ evaluate(
480
488
  )
481
489
  ```
482
490
 
491
+ - Simulator now requires a model configuration to call the prompty instead of an Azure AI project scope. This enables the usage of simulator with Entra ID based auth.
492
+ Before:
493
+ ```python
494
+ azure_ai_project = {
495
+ "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
496
+ "resource_group_name": os.environ.get("RESOURCE_GROUP"),
497
+ "project_name": os.environ.get("PROJECT_NAME"),
498
+ }
499
+ sim = Simulator(azure_ai_project=azure_ai_project, credentails=DefaultAzureCredentials())
500
+ ```
501
+ After:
502
+ ```python
503
+ model_config = {
504
+ "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
505
+ "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
506
+ }
507
+ sim = Simulator(model_config=model_config)
508
+ ```
509
+ If `api_key` is not included in the `model_config`, the prompty runtime in `promptflow-core` will pick up `DefaultAzureCredential`.
510
+
483
511
  ### Bugs Fixed
484
512
 
485
- - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
513
+ - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
486
514
 
487
515
  ## 1.0.0b2 (2024-09-24)
488
516
 
@@ -495,9 +523,9 @@ evaluate(
495
523
  ### Breaking Changes
496
524
 
497
525
  - The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
498
- - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
526
+ - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
499
527
  - The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
500
- - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
528
+ - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
501
529
  - Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
502
530
 
503
531
  ### Features Added
@@ -119,11 +119,6 @@ name: ApplicationPrompty
119
119
  description: Simulates an application
120
120
  model:
121
121
  api: chat
122
- configuration:
123
- type: azure_openai
124
- azure_deployment: ${env:AZURE_DEPLOYMENT}
125
- api_key: ${env:AZURE_OPENAI_API_KEY}
126
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
127
122
  parameters:
128
123
  temperature: 0.0
129
124
  top_p: 1.0
@@ -152,52 +147,55 @@ import asyncio
152
147
  from typing import Any, Dict, List, Optional
153
148
  from azure.ai.evaluation.simulator import Simulator
154
149
  from promptflow.client import load_flow
155
- from azure.identity import DefaultAzureCredential
156
150
  import os
151
+ import wikipedia
157
152
 
158
- azure_ai_project = {
159
- "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
160
- "resource_group_name": os.environ.get("RESOURCE_GROUP"),
161
- "project_name": os.environ.get("PROJECT_NAME")
153
+ # Set up the model configuration without api_key, using DefaultAzureCredential
154
+ model_config = {
155
+ "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
156
+ "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
157
+ # not providing key would make the SDK pick up `DefaultAzureCredential`
158
+ # use "api_key": "<your API key>"
162
159
  }
163
160
 
164
- import wikipedia
165
- wiki_search_term = "Leonardo da vinci"
161
+ # Use Wikipedia to get some text for the simulation
162
+ wiki_search_term = "Leonardo da Vinci"
166
163
  wiki_title = wikipedia.search(wiki_search_term)[0]
167
164
  wiki_page = wikipedia.page(wiki_title)
168
165
  text = wiki_page.summary[:1000]
169
166
 
170
- def method_to_invoke_application_prompty(query: str):
167
+ def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]):
171
168
  try:
172
169
  current_dir = os.path.dirname(__file__)
173
170
  prompty_path = os.path.join(current_dir, "application.prompty")
174
- _flow = load_flow(source=prompty_path, model={
175
- "configuration": azure_ai_project
176
- })
171
+ _flow = load_flow(
172
+ source=prompty_path,
173
+ model=model_config,
174
+ credential=DefaultAzureCredential()
175
+ )
177
176
  response = _flow(
178
177
  query=query,
179
178
  context=context,
180
179
  conversation_history=messages_list
181
180
  )
182
181
  return response
183
- except:
184
- print("Something went wrong invoking the prompty")
182
+ except Exception as e:
183
+ print(f"Something went wrong invoking the prompty: {e}")
185
184
  return "something went wrong"
186
185
 
187
186
  async def callback(
188
- messages: List[Dict],
187
+ messages: Dict[str, List[Dict]],
189
188
  stream: bool = False,
190
189
  session_state: Any = None, # noqa: ANN401
191
190
  context: Optional[Dict[str, Any]] = None,
192
191
  ) -> dict:
193
192
  messages_list = messages["messages"]
194
- # get last message
193
+ # Get the last message from the user
195
194
  latest_message = messages_list[-1]
196
195
  query = latest_message["content"]
197
- context = None
198
- # call your endpoint or ai application here
199
- response = method_to_invoke_application_prompty(query)
200
- # we are formatting the response to follow the openAI chat protocol format
196
+ # Call your endpoint or AI application here
197
+ response = method_to_invoke_application_prompty(query, messages_list, context)
198
+ # Format the response to follow the OpenAI chat protocol format
201
199
  formatted_response = {
202
200
  "content": response,
203
201
  "role": "assistant",
@@ -208,10 +206,8 @@ async def callback(
208
206
  messages["messages"].append(formatted_response)
209
207
  return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}
210
208
 
211
-
212
-
213
209
  async def main():
214
- simulator = Simulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
210
+ simulator = Simulator(model_config=model_config)
215
211
  outputs = await simulator(
216
212
  target=callback,
217
213
  text=text,
@@ -222,17 +218,17 @@ async def main():
222
218
  f"I am a teacher and I want to teach my students about {wiki_search_term}"
223
219
  ],
224
220
  )
225
- print(json.dumps(outputs))
221
+ print(json.dumps(outputs, indent=2))
226
222
 
227
223
  if __name__ == "__main__":
228
- os.environ["AZURE_SUBSCRIPTION_ID"] = ""
229
- os.environ["RESOURCE_GROUP"] = ""
230
- os.environ["PROJECT_NAME"] = ""
231
- os.environ["AZURE_OPENAI_API_KEY"] = ""
232
- os.environ["AZURE_OPENAI_ENDPOINT"] = ""
233
- os.environ["AZURE_DEPLOYMENT"] = ""
224
+ # Ensure that the following environment variables are set in your environment:
225
+ # AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT
226
+ # Example:
227
+ # os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/"
228
+ # os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name"
234
229
  asyncio.run(main())
235
230
  print("done!")
231
+
236
232
  ```
237
233
 
238
234
  #### Adversarial Simulator
@@ -3,6 +3,8 @@
3
3
  # ---------------------------------------------------------
4
4
  from enum import Enum
5
5
 
6
+ from azure.core import CaseInsensitiveEnumMeta
7
+
6
8
 
7
9
  class CommonConstants:
8
10
  """Define common constants."""
@@ -43,7 +45,7 @@ class _InternalAnnotationTasks:
43
45
  ECI = "eci"
44
46
 
45
47
 
46
- class EvaluationMetrics:
48
+ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
47
49
  """Evaluation metrics to aid the RAI service in determining what
48
50
  metrics to request, and how to present them back to the user."""
49
51
 
@@ -56,7 +58,7 @@ class EvaluationMetrics:
56
58
  XPIA = "xpia"
57
59
 
58
60
 
59
- class _InternalEvaluationMetrics:
61
+ class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
60
62
  """Evaluation metrics that are not publicly supported.
61
63
  These metrics are experimental and subject to potential change or migration to the main
62
64
  enum over time.
@@ -0,0 +1,18 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import math
6
+ from typing import List
7
+
8
+
9
+ def list_sum(lst: List[float]) -> float:
10
+ return sum(lst)
11
+
12
+
13
+ def list_mean(lst: List[float]) -> float:
14
+ return list_sum(lst) / len(lst)
15
+
16
+
17
+ def list_mean_nan_safe(lst: List[float]) -> float:
18
+ return list_mean([l for l in lst if not math.isnan(l)])