azure-ai-evaluation 1.0.0b1__tar.gz → 1.0.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (139) hide show
  1. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/CHANGELOG.md +6 -0
  2. {azure_ai_evaluation-1.0.0b1/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.0.0b2}/PKG-INFO +86 -14
  3. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/README.md +79 -13
  4. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/__init__.py +1 -5
  5. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_common/rai_service.py +4 -4
  6. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_common/utils.py +19 -19
  7. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_constants.py +9 -0
  8. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
  9. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
  10. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
  11. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
  12. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_evaluate.py +35 -28
  13. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
  14. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_utils.py +29 -22
  15. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_chat/_chat.py +16 -9
  16. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +4 -10
  17. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -10
  18. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
  19. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +1 -2
  20. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
  21. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +1 -1
  22. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +1 -1
  23. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +1 -1
  24. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +1 -1
  25. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_eci/_eci.py +2 -2
  26. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
  27. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +5 -10
  28. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +5 -10
  29. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
  30. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +2 -2
  31. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +2 -2
  32. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
  33. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +5 -10
  34. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
  35. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -10
  36. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +1 -2
  37. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_version.py +1 -1
  38. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/__init__.py +1 -1
  39. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
  40. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  41. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
  42. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
  43. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
  44. azure_ai_evaluation-1.0.0b2/azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
  45. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
  46. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
  47. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
  48. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
  49. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  50. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
  51. azure_ai_evaluation-1.0.0b1/azure/ai/evaluation/simulator/simulator.py → azure_ai_evaluation-1.0.0b2/azure/ai/evaluation/simulator/_simulator.py +147 -80
  52. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_tracing.py +21 -24
  53. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_utils.py +4 -1
  54. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2/azure_ai_evaluation.egg-info}/PKG-INFO +86 -14
  55. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure_ai_evaluation.egg-info/SOURCES.txt +4 -1
  56. azure_ai_evaluation-1.0.0b2/pyproject.toml +21 -0
  57. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/setup.py +1 -0
  58. azure_ai_evaluation-1.0.0b2/tests/e2etests/__init__.py +0 -0
  59. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/e2etests/test_metrics_upload.py +9 -3
  60. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_evaluate.py +102 -6
  61. azure_ai_evaluation-1.0.0b2/tests/unittests/test_evaluators/test_inputs_evaluators.py +46 -0
  62. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_non_adv_simulator.py +11 -12
  63. azure_ai_evaluation-1.0.0b1/pyproject.toml +0 -6
  64. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/MANIFEST.in +0 -0
  65. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/__init__.py +0 -0
  66. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/__init__.py +0 -0
  67. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_common/__init__.py +0 -0
  68. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_common/constants.py +0 -0
  69. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
  70. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +0 -0
  71. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
  72. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  73. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  74. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_chat/__init__.py +0 -0
  75. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -0
  76. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -0
  77. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  78. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  79. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  80. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  81. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  82. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  83. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  84. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  85. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  86. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  87. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -0
  88. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  89. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  90. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -0
  91. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  92. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  93. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  94. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  95. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  96. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  97. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  98. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_exceptions.py +0 -0
  99. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_http_utils.py +3 -3
  100. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_model_configurations.py +0 -0
  101. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/_user_agent.py +0 -0
  102. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/py.typed +0 -0
  103. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  104. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_constants.py +0 -0
  105. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  106. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  107. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  108. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  109. {azure_ai_evaluation-1.0.0b1/tests → azure_ai_evaluation-1.0.0b2/azure/ai/evaluation/simulator/_prompty}/__init__.py +0 -0
  110. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  111. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  112. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  113. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  114. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure_ai_evaluation.egg-info/requires.txt +0 -0
  115. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  116. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/setup.cfg +0 -0
  117. {azure_ai_evaluation-1.0.0b1/tests/e2etests → azure_ai_evaluation-1.0.0b2/tests}/__init__.py +0 -0
  118. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/__openai_patcher.py +0 -0
  119. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/conftest.py +0 -0
  120. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  121. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/e2etests/target_fn.py +0 -0
  122. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/e2etests/test_adv_simulator.py +0 -0
  123. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/e2etests/test_builtin_evaluators.py +0 -0
  124. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/e2etests/test_evaluate.py +0 -0
  125. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_batch_run_context.py +0 -0
  126. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_built_in_evaluator.py +0 -0
  127. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_chat_evaluator.py +0 -0
  128. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_content_safety_chat_evaluator.py +0 -0
  129. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  130. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_content_safety_rai_script.py +0 -0
  131. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_eval_run.py +0 -0
  132. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_evaluate_telemetry.py +0 -0
  133. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
  134. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_jailbreak_simulator.py +0 -0
  135. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_save_eval.py +0 -0
  136. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_simulator.py +0 -0
  137. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  138. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  139. {azure_ai_evaluation-1.0.0b1 → azure_ai_evaluation-1.0.0b2}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,11 @@
1
1
  # Release History
2
2
 
3
+ ## 1.0.0b2 (2024-09-24)
4
+
5
+ ### Breaking Changes
6
+
7
+ - `data` and `evaluators` are now required keywords in `evaluate`.
8
+
3
9
  ## 1.0.0b1 (2024-09-20)
4
10
 
5
11
  ### Breaking Changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.0.0b1
3
+ Version: 1.0.0b2
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -35,11 +35,27 @@ Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "pf-azure"
35
35
 
36
36
  # Azure AI Evaluation client library for Python
37
37
 
38
+ We are excited to introduce the public preview of the Azure AI Evaluation SDK.
39
+
40
+ [Source code][source_code]
41
+ | [Package (PyPI)][evaluation_pypi]
42
+ | [API reference documentation][evaluation_ref_docs]
43
+ | [Product documentation][product_documentation]
44
+ | [Samples][evaluation_samples]
45
+
46
+ This package has been tested with Python 3.8, 3.9, 3.10, 3.11, and 3.12.
47
+
48
+ For a more complete set of Azure libraries, see https://aka.ms/azsdk/python/all
49
+
38
50
  ## Getting started
39
51
 
52
+ ### Prerequisites
53
+
54
+ - Python 3.8 or later is required to use this package.
55
+
40
56
  ### Install the package
41
57
 
42
- Install the Azure AI Evaluation library for Python with:
58
+ Install the Azure AI Evaluation library for Python with [pip][pip_link]::
43
59
 
44
60
  ```bash
45
61
  pip install azure-ai-evaluation
@@ -51,6 +67,8 @@ Evaluators are custom or prebuilt classes or functions that are designed to meas
51
67
 
52
68
  ## Examples
53
69
 
70
+ ### Evaluators
71
+
54
72
  Users can create evaluator runs on the local machine as shown in the example below:
55
73
 
56
74
  ```python
@@ -92,9 +110,9 @@ if __name__ == "__main__":
92
110
 
93
111
  # Initialize Project Scope
94
112
  azure_ai_project = {
95
- "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054",
96
- "resource_group_name": "rg-test",
97
- "project_name": "project-test",
113
+ "subscription_id": <subscription_id>,
114
+ "resource_group_name": <resource_group_name>,
115
+ "project_name": <project_name>
98
116
  }
99
117
 
100
118
  violence_eval = ViolenceEvaluator(azure_ai_project)
@@ -122,9 +140,13 @@ if __name__ == "__main__":
122
140
 
123
141
  pprint(result)
124
142
  ```
125
- ## Simulator
143
+ ### Simulator
126
144
 
127
- Sample application prompty
145
+
146
+ Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
147
+ their AI application.
148
+
149
+ #### Simulating with a Prompty
128
150
 
129
151
  ```yaml
130
152
  ---
@@ -163,7 +185,7 @@ Application code:
163
185
  import json
164
186
  import asyncio
165
187
  from typing import Any, Dict, List, Optional
166
- from azure.ai.evaluation.synthetic import Simulator
188
+ from azure.ai.evaluation.simulator import Simulator
167
189
  from promptflow.client import load_flow
168
190
  from azure.identity import DefaultAzureCredential
169
191
  import os
@@ -171,8 +193,7 @@ import os
171
193
  azure_ai_project = {
172
194
  "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
173
195
  "resource_group_name": os.environ.get("RESOURCE_GROUP"),
174
- "project_name": os.environ.get("PROJECT_NAME"),
175
- "credential": DefaultAzureCredential(),
196
+ "project_name": os.environ.get("PROJECT_NAME")
176
197
  }
177
198
 
178
199
  import wikipedia
@@ -249,8 +270,7 @@ if __name__ == "__main__":
249
270
  print("done!")
250
271
  ```
251
272
 
252
- Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
253
- their AI application. Here's a sample of a callback which invokes AsyncAzureOpenAI:
273
+ #### Adversarial Simulator
254
274
 
255
275
  ```python
256
276
  from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
@@ -318,7 +338,9 @@ async def callback(
318
338
  }
319
339
 
320
340
  ```
321
- ### Adversarial QA:
341
+
342
+ #### Adversarial QA
343
+
322
344
  ```python
323
345
  scenario = AdversarialScenario.ADVERSARIAL_QA
324
346
  simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
@@ -334,7 +356,7 @@ outputs = asyncio.run(
334
356
 
335
357
  print(outputs.to_eval_qa_json_lines())
336
358
  ```
337
- ### Direct Attack Simulator
359
+ #### Direct Attack Simulator
338
360
 
339
361
  ```python
340
362
  scenario = AdversarialScenario.ADVERSARIAL_QA
@@ -353,13 +375,63 @@ print(outputs)
353
375
  ```
354
376
  ## Troubleshooting
355
377
 
378
+ ### General
379
+
380
+ Azure ML clients raise exceptions defined in [Azure Core][azure_core_readme].
381
+
382
+ ### Logging
383
+
384
+ This library uses the standard
385
+ [logging][python_logging] library for logging.
386
+ Basic information about HTTP sessions (URLs, headers, etc.) is logged at INFO
387
+ level.
388
+
389
+ Detailed DEBUG level logging, including request/response bodies and unredacted
390
+ headers, can be enabled on a client with the `logging_enable` argument.
391
+
392
+ See full SDK logging documentation with examples [here][sdk_logging_docs].
393
+
356
394
  ## Next steps
357
395
 
396
+ - View our [samples][evaluation_samples].
397
+ - View our [documentation][product_documentation]
398
+
358
399
  ## Contributing
359
400
 
401
+ This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [cla.microsoft.com][cla].
402
+
403
+ When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
404
+
405
+ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information see the [Code of Conduct FAQ][coc_faq] or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments.
406
+
407
+ <!-- LINKS -->
408
+
409
+ [source_code]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/evaluation/azure-ai-evaluation
410
+ [evaluation_pypi]: https://pypi.org/project/azure-ai-evaluation/
411
+ [evaluation_ref_docs]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
412
+ [evaluation_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios
413
+ [product_documentation]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk
414
+ [python_logging]: https://docs.python.org/3/library/logging.html
415
+ [sdk_logging_docs]: https://docs.microsoft.com/azure/developer/python/azure-sdk-logging
416
+ [azure_core_readme]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
417
+ [pip_link]: https://pypi.org/project/pip/
418
+ [azure_core_ref_docs]: https://aka.ms/azsdk-python-core-policies
419
+ [azure_core]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
420
+ [azure_identity]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/identity/azure-identity
421
+ [cla]: https://cla.microsoft.com
422
+ [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/
423
+ [coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/
424
+ [coc_contact]: mailto:opencode@microsoft.com
425
+
360
426
 
361
427
  # Release History
362
428
 
429
+ ## 1.0.0b2 (2024-09-24)
430
+
431
+ ### Breaking Changes
432
+
433
+ - `data` and `evaluators` are now required keywords in `evaluate`.
434
+
363
435
  ## 1.0.0b1 (2024-09-20)
364
436
 
365
437
  ### Breaking Changes
@@ -1,10 +1,26 @@
1
1
  # Azure AI Evaluation client library for Python
2
2
 
3
+ We are excited to introduce the public preview of the Azure AI Evaluation SDK.
4
+
5
+ [Source code][source_code]
6
+ | [Package (PyPI)][evaluation_pypi]
7
+ | [API reference documentation][evaluation_ref_docs]
8
+ | [Product documentation][product_documentation]
9
+ | [Samples][evaluation_samples]
10
+
11
+ This package has been tested with Python 3.8, 3.9, 3.10, 3.11, and 3.12.
12
+
13
+ For a more complete set of Azure libraries, see https://aka.ms/azsdk/python/all
14
+
3
15
  ## Getting started
4
16
 
17
+ ### Prerequisites
18
+
19
+ - Python 3.8 or later is required to use this package.
20
+
5
21
  ### Install the package
6
22
 
7
- Install the Azure AI Evaluation library for Python with:
23
+ Install the Azure AI Evaluation library for Python with [pip][pip_link]::
8
24
 
9
25
  ```bash
10
26
  pip install azure-ai-evaluation
@@ -16,6 +32,8 @@ Evaluators are custom or prebuilt classes or functions that are designed to meas
16
32
 
17
33
  ## Examples
18
34
 
35
+ ### Evaluators
36
+
19
37
  Users can create evaluator runs on the local machine as shown in the example below:
20
38
 
21
39
  ```python
@@ -57,9 +75,9 @@ if __name__ == "__main__":
57
75
 
58
76
  # Initialize Project Scope
59
77
  azure_ai_project = {
60
- "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054",
61
- "resource_group_name": "rg-test",
62
- "project_name": "project-test",
78
+ "subscription_id": <subscription_id>,
79
+ "resource_group_name": <resource_group_name>,
80
+ "project_name": <project_name>
63
81
  }
64
82
 
65
83
  violence_eval = ViolenceEvaluator(azure_ai_project)
@@ -87,9 +105,13 @@ if __name__ == "__main__":
87
105
 
88
106
  pprint(result)
89
107
  ```
90
- ## Simulator
108
+ ### Simulator
109
+
91
110
 
92
- Sample application prompty
111
+ Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
112
+ their AI application.
113
+
114
+ #### Simulating with a Prompty
93
115
 
94
116
  ```yaml
95
117
  ---
@@ -128,7 +150,7 @@ Application code:
128
150
  import json
129
151
  import asyncio
130
152
  from typing import Any, Dict, List, Optional
131
- from azure.ai.evaluation.synthetic import Simulator
153
+ from azure.ai.evaluation.simulator import Simulator
132
154
  from promptflow.client import load_flow
133
155
  from azure.identity import DefaultAzureCredential
134
156
  import os
@@ -136,8 +158,7 @@ import os
136
158
  azure_ai_project = {
137
159
  "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
138
160
  "resource_group_name": os.environ.get("RESOURCE_GROUP"),
139
- "project_name": os.environ.get("PROJECT_NAME"),
140
- "credential": DefaultAzureCredential(),
161
+ "project_name": os.environ.get("PROJECT_NAME")
141
162
  }
142
163
 
143
164
  import wikipedia
@@ -214,8 +235,7 @@ if __name__ == "__main__":
214
235
  print("done!")
215
236
  ```
216
237
 
217
- Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
218
- their AI application. Here's a sample of a callback which invokes AsyncAzureOpenAI:
238
+ #### Adversarial Simulator
219
239
 
220
240
  ```python
221
241
  from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
@@ -283,7 +303,9 @@ async def callback(
283
303
  }
284
304
 
285
305
  ```
286
- ### Adversarial QA:
306
+
307
+ #### Adversarial QA
308
+
287
309
  ```python
288
310
  scenario = AdversarialScenario.ADVERSARIAL_QA
289
311
  simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
@@ -299,7 +321,7 @@ outputs = asyncio.run(
299
321
 
300
322
  print(outputs.to_eval_qa_json_lines())
301
323
  ```
302
- ### Direct Attack Simulator
324
+ #### Direct Attack Simulator
303
325
 
304
326
  ```python
305
327
  scenario = AdversarialScenario.ADVERSARIAL_QA
@@ -318,6 +340,50 @@ print(outputs)
318
340
  ```
319
341
  ## Troubleshooting
320
342
 
343
+ ### General
344
+
345
+ Azure ML clients raise exceptions defined in [Azure Core][azure_core_readme].
346
+
347
+ ### Logging
348
+
349
+ This library uses the standard
350
+ [logging][python_logging] library for logging.
351
+ Basic information about HTTP sessions (URLs, headers, etc.) is logged at INFO
352
+ level.
353
+
354
+ Detailed DEBUG level logging, including request/response bodies and unredacted
355
+ headers, can be enabled on a client with the `logging_enable` argument.
356
+
357
+ See full SDK logging documentation with examples [here][sdk_logging_docs].
358
+
321
359
  ## Next steps
322
360
 
361
+ - View our [samples][evaluation_samples].
362
+ - View our [documentation][product_documentation]
363
+
323
364
  ## Contributing
365
+
366
+ This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [cla.microsoft.com][cla].
367
+
368
+ When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
369
+
370
+ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information see the [Code of Conduct FAQ][coc_faq] or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments.
371
+
372
+ <!-- LINKS -->
373
+
374
+ [source_code]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/evaluation/azure-ai-evaluation
375
+ [evaluation_pypi]: https://pypi.org/project/azure-ai-evaluation/
376
+ [evaluation_ref_docs]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
377
+ [evaluation_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios
378
+ [product_documentation]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk
379
+ [python_logging]: https://docs.python.org/3/library/logging.html
380
+ [sdk_logging_docs]: https://docs.microsoft.com/azure/developer/python/azure-sdk-logging
381
+ [azure_core_readme]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
382
+ [pip_link]: https://pypi.org/project/pip/
383
+ [azure_core_ref_docs]: https://aka.ms/azsdk-python-core-policies
384
+ [azure_core]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
385
+ [azure_identity]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/identity/azure-identity
386
+ [cla]: https://cla.microsoft.com
387
+ [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/
388
+ [coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/
389
+ [coc_contact]: mailto:opencode@microsoft.com
@@ -25,11 +25,7 @@ from ._evaluators._relevance import RelevanceEvaluator
25
25
  from ._evaluators._rouge import RougeScoreEvaluator, RougeType
26
26
  from ._evaluators._similarity import SimilarityEvaluator
27
27
  from ._evaluators._xpia import IndirectAttackEvaluator
28
- from ._model_configurations import (
29
- AzureAIProject,
30
- AzureOpenAIModelConfiguration,
31
- OpenAIModelConfiguration,
32
- )
28
+ from ._model_configurations import AzureAIProject, AzureOpenAIModelConfiguration, OpenAIModelConfiguration
33
29
 
34
30
  __all__ = [
35
31
  "evaluate",
@@ -11,12 +11,12 @@ from urllib.parse import urlparse
11
11
 
12
12
  import jwt
13
13
  import numpy as np
14
- from azure.core.credentials import TokenCredential
15
- from azure.identity import DefaultAzureCredential
16
14
 
15
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
16
  from azure.ai.evaluation._http_utils import get_async_http_client
18
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
19
17
  from azure.ai.evaluation._model_configurations import AzureAIProject
18
+ from azure.core.credentials import TokenCredential
19
+ from azure.identity import DefaultAzureCredential
20
20
 
21
21
  from .constants import (
22
22
  CommonConstants,
@@ -348,7 +348,7 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
348
348
  )
349
349
 
350
350
  if response.status_code != 200:
351
- msg = f"Failed to retrieve the discovery service URL."
351
+ msg = "Failed to retrieve the discovery service URL."
352
352
  raise EvaluationException(
353
353
  message=msg,
354
354
  internal_message=msg,
@@ -2,20 +2,15 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import Optional, Union
6
-
7
- from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
5
+ import threading
6
+ from typing import List, Optional, Union
8
7
 
9
- try:
10
- from . import constants
11
- except ImportError:
12
- import constants
8
+ import nltk
9
+ import numpy as np
13
10
 
14
- from typing import List
11
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
15
12
 
16
- import threading
17
- import numpy as np
18
- import nltk
13
+ from . import constants
19
14
 
20
15
  _nltk_data_download_lock = threading.Lock()
21
16
 
@@ -46,7 +41,7 @@ def ensure_nltk_data_downloaded():
46
41
  """Download NLTK data packages if not already downloaded."""
47
42
  with _nltk_data_download_lock:
48
43
  try:
49
- from nltk.tokenize.nist import NISTTokenizer
44
+ from nltk.tokenize.nist import NISTTokenizer # pylint: disable=unused-import
50
45
  except LookupError:
51
46
  nltk.download("perluniprops")
52
47
  nltk.download("punkt")
@@ -54,12 +49,19 @@ def ensure_nltk_data_downloaded():
54
49
 
55
50
 
56
51
  def nltk_tokenize(text: str) -> List[str]:
57
- """Tokenize the input text using the NLTK tokenizer."""
52
+ """Tokenize the input text using the NLTK tokenizer.
53
+
54
+ :param text: The text to tokenize
55
+ :type text: str
56
+ :return: A list of tokens
57
+ :rtype: list[str]
58
+ """
58
59
  ensure_nltk_data_downloaded()
59
60
 
60
61
  if not text.isascii():
61
62
  # Use NISTTokenizer for international tokenization
62
63
  from nltk.tokenize.nist import NISTTokenizer
64
+
63
65
  tokens = NISTTokenizer().international_tokenize(text)
64
66
  else:
65
67
  # By default, use NLTK word tokenizer
@@ -68,20 +70,18 @@ def nltk_tokenize(text: str) -> List[str]:
68
70
  return list(tokens)
69
71
 
70
72
 
71
- def check_and_add_api_version_for_aoai_model_config(
73
+ def ensure_api_version_in_aoai_model_config(
72
74
  model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
73
75
  default_api_version: str,
74
76
  ) -> None:
75
- if (
76
- "azure_endpoint" in model_config or "azure_deployment" in model_config
77
- ):
77
+ if "azure_endpoint" in model_config or "azure_deployment" in model_config:
78
78
  model_config["api_version"] = model_config.get("api_version", default_api_version)
79
79
 
80
80
 
81
- def check_and_add_user_agent_for_aoai_model_config(
81
+ def ensure_user_agent_in_aoai_model_config(
82
82
  model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
83
83
  prompty_model_config: dict,
84
84
  user_agent: Optional[str] = None,
85
85
  ) -> None:
86
86
  if user_agent and ("azure_endpoint" in model_config or "azure_deployment" in model_config):
87
- prompty_model_config["parameters"]["extra_headers"].update({"x-ms-useragent": user_agent})
87
+ prompty_model_config["parameters"]["extra_headers"].update({"x-ms-useragent": user_agent})
@@ -39,6 +39,15 @@ class Prefixes:
39
39
  TSG_OUTPUTS = "__outputs."
40
40
 
41
41
 
42
+ class DefaultOpenEncoding:
43
+ """Enum that captures SDK's default values for the encoding param of open(...)"""
44
+
45
+ READ = "utf-8-sig"
46
+ """SDK Default Encoding when reading a file"""
47
+ WRITE = "utf-8"
48
+ """SDK Default Encoding when writing a file"""
49
+
50
+
42
51
  DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
43
52
 
44
53
  CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
@@ -5,13 +5,14 @@ import os
5
5
 
6
6
  from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
7
7
  from promptflow._utils.user_agent_utils import ClientUserAgentUtil
8
+ from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
9
+
8
10
  from azure.ai.evaluation._constants import (
9
11
  OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
10
12
  OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT,
11
13
  PF_BATCH_TIMEOUT_SEC,
12
14
  PF_BATCH_TIMEOUT_SEC_DEFAULT,
13
15
  )
14
- from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
15
16
 
16
17
  from ..._user_agent import USER_AGENT
17
18
  from .._utils import set_event_loop_policy
@@ -4,13 +4,16 @@
4
4
  import inspect
5
5
  import json
6
6
  import logging
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Callable, Dict, Optional, Union
7
10
 
8
11
  import pandas as pd
9
-
10
12
  from promptflow.contracts.types import AttrDict
11
- from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
12
13
  from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
13
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
14
+
15
+ from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
16
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
14
17
 
15
18
  from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
16
19
 
@@ -18,7 +21,9 @@ LOGGER = logging.getLogger(__name__)
18
21
 
19
22
 
20
23
  class CodeRun:
21
- def __init__(self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs):
24
+ def __init__(
25
+ self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs # pylint: disable=unused-argument
26
+ ):
22
27
  self.run = run
23
28
  self.evaluator_name = evaluator_name if evaluator_name is not None else ""
24
29
  self.input_data = input_data
@@ -40,13 +45,13 @@ class CodeRun:
40
45
  else None
41
46
  )
42
47
  except Exception as ex: # pylint: disable=broad-exception-caught
43
- LOGGER.debug(f"Error calculating metrics for evaluator {self.evaluator_name}, failed with error {str(ex)}")
48
+ LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", self.evaluator_name, ex)
44
49
  aggregated_metrics = None
45
50
 
46
51
  if not isinstance(aggregated_metrics, dict):
47
52
  LOGGER.warning(
48
- f"Aggregated metrics for evaluator {self.evaluator_name}"
49
- f" is not a dictionary will not be logged as metrics"
53
+ "Aggregated metrics for evaluator %s is not a dictionary will not be logged as metrics",
54
+ self.evaluator_name,
50
55
  )
51
56
 
52
57
  aggregated_metrics = aggregated_metrics if isinstance(aggregated_metrics, dict) else {}
@@ -54,11 +59,15 @@ class CodeRun:
54
59
  return aggregated_metrics
55
60
 
56
61
 
57
- class CodeClient:
58
- def __init__(self):
62
+ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
63
+ def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
64
+ self,
65
+ ) -> None:
59
66
  self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
60
67
 
61
- def _calculate_metric(self, evaluator, input_df, column_mapping, evaluator_name):
68
+ def _calculate_metric(
69
+ self, evaluator: Callable, input_df: pd.DataFrame, column_mapping: Optional[Dict[str, str]], evaluator_name: str
70
+ ) -> pd.DataFrame:
62
71
  row_metric_futures = []
63
72
  row_metric_results = []
64
73
  input_df = _apply_column_mapping(input_df, column_mapping)
@@ -110,18 +119,25 @@ class CodeClient:
110
119
  return aggregated_output
111
120
  except Exception as ex: # pylint: disable=broad-exception-caught
112
121
  LOGGER.warning(
113
- f"Error calculating aggregations for evaluator {run.evaluator_name}," f" failed with error {str(ex)}"
122
+ "Error calculating aggregations for evaluator %s, failed with error %s", run.evaluator_name, ex
114
123
  )
115
124
  return None
116
125
 
117
- def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs):
126
+ def run(
127
+ self, # pylint: disable=unused-argument
128
+ flow: Callable,
129
+ data: Union[os.PathLike, Path, pd.DataFrame],
130
+ evaluator_name: Optional[str] = None,
131
+ column_mapping: Optional[Dict[str, str]] = None,
132
+ **kwargs,
133
+ ) -> CodeRun:
118
134
  input_df = data
119
135
  if not isinstance(input_df, pd.DataFrame):
120
136
  try:
121
137
  json_data = load_jsonl(data)
122
138
  except json.JSONDecodeError as exc:
123
139
  raise EvaluationException(
124
- message = f"Failed to parse data as JSON: {data}. Provide valid json lines data.",
140
+ message=f"Failed to parse data as JSON: {data}. Provide valid json lines data.",
125
141
  internal_message="Failed to parse data as JSON",
126
142
  target=ErrorTarget.CODE_CLIENT,
127
143
  category=ErrorCategory.INVALID_VALUE,
@@ -129,22 +145,28 @@ class CodeClient:
129
145
  ) from exc
130
146
 
131
147
  input_df = pd.DataFrame(json_data)
132
- eval_future = self._thread_pool.submit(self._calculate_metric, flow, input_df, column_mapping, evaluator_name)
148
+ eval_future = self._thread_pool.submit(
149
+ self._calculate_metric,
150
+ evaluator=flow,
151
+ input_df=input_df,
152
+ column_mapping=column_mapping,
153
+ evaluator_name=evaluator_name,
154
+ )
133
155
  run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
134
156
  aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
135
157
  run.aggregated_metrics = aggregation_future
136
158
  return run
137
159
 
138
- def get_details(self, run, all_results=False):
160
+ def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
139
161
  result_df = run.get_result_df(exclude_inputs=not all_results)
140
162
  return result_df
141
163
 
142
- def get_metrics(self, run):
164
+ def get_metrics(self, run: CodeRun) -> Optional[None]:
143
165
  try:
144
166
  aggregated_metrics = run.get_aggregated_metrics()
145
167
  print("Aggregated metrics")
146
168
  print(aggregated_metrics)
147
169
  except Exception as ex: # pylint: disable=broad-exception-caught
148
- LOGGER.debug(f"Error calculating metrics for evaluator {run.evaluator_name}, failed with error {str(ex)}")
170
+ LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", run.evaluator_name, ex)
149
171
  return None
150
172
  return aggregated_metrics