azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,16 @@
3
3
  # ---------------------------------------------------------
4
4
  from enum import Enum
5
5
 
6
- from promptflow._utils.async_utils import async_run_allowing_running_loop
6
+ from typing import Dict, Union
7
+ from typing_extensions import overload, override
7
8
 
8
9
  from azure.ai.evaluation._vendor.rouge_score import rouge_scorer
10
+ from azure.ai.evaluation._evaluators._common import EvaluatorBase
11
+ from azure.ai.evaluation._constants import EVALUATION_PASS_FAIL_MAPPING
12
+ import math
9
13
 
10
14
 
11
- class RougeType(Enum):
15
+ class RougeType(str, Enum):
12
16
  """
13
17
  Enumeration of ROUGE (Recall-Oriented Understudy for Gisting Evaluation) types.
14
18
  """
@@ -32,21 +36,7 @@ class RougeType(Enum):
32
36
  """Overlap of L-grams (L consecutive words) between generated and reference text."""
33
37
 
34
38
 
35
- class _AsyncRougeScoreEvaluator:
36
- def __init__(self, rouge_type: RougeType):
37
- self._rouge_type = rouge_type
38
-
39
- async def __call__(self, *, ground_truth: str, response: str, **kwargs):
40
- scorer = rouge_scorer.RougeScorer(rouge_types=[self._rouge_type.value])
41
- metrics = scorer.score(ground_truth, response)[self._rouge_type.value]
42
- return {
43
- "rouge_precision": metrics.precision,
44
- "rouge_recall": metrics.recall,
45
- "rouge_f1_score": metrics.fmeasure,
46
- }
47
-
48
-
49
- class RougeScoreEvaluator:
39
+ class RougeScoreEvaluator(EvaluatorBase):
50
40
  """
51
41
  Calculates the ROUGE score for a given response and ground truth.
52
42
 
@@ -62,6 +52,14 @@ class RougeScoreEvaluator:
62
52
  information from the reference text.
63
53
 
64
54
  ROUGE scores range from 0 to 1, with higher scores indicating better quality.
55
+ :param rouge_type: The type of ROUGE score to calculate. Default is "rouge1".
56
+ :type rouge_type: str
57
+ :param precision_threshold: The threshold value to determine if the precision evaluation passes or fails. Default is 0.5.
58
+ :type precision_threshold: float
59
+ :param recall_threshold: The threshold value to determine if the recall evaluation passes or fails. Default is 0.5.
60
+ :type recall_threshold: float
61
+ :param f1_score_threshold: The threshold value to determine if the F1 score evaluation passes or fails. Default is 0.5.
62
+ :type f1_score_threshold: float
65
63
 
66
64
  .. admonition:: Example:
67
65
 
@@ -71,15 +69,146 @@ class RougeScoreEvaluator:
71
69
  :language: python
72
70
  :dedent: 8
73
71
  :caption: Initialize and call a RougeScoreEvaluator with a four-gram rouge type.
72
+
73
+ .. admonition:: Example using Azure AI Project URL:
74
+
75
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
76
+ :start-after: [START rouge_score_evaluator]
77
+ :end-before: [END rouge_score_evaluator]
78
+ :language: python
79
+ :dedent: 8
80
+ :caption: Initialize and call RougeScoreEvaluator using Azure AI Project URL in the following format
81
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
82
+
83
+ .. admonition:: Example with threshold:
84
+
85
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
86
+ :start-after: [START threshold_rouge_score_evaluator]
87
+ :end-before: [END threshold_rouge_score_evaluator]
88
+ :language: python
89
+ :dedent: 8
90
+ :caption: Initialize with a specified threshold and call a RougeScoreEvaluator with a four-gram rouge type.
74
91
  """
75
92
 
76
- id = "azureml://registries/azureml/models/Rouge-Score-Evaluator/versions/3"
93
+ id = "azureai://built-in/evaluators/rouge_score"
77
94
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
78
95
 
79
- def __init__(self, rouge_type: RougeType):
80
- self._async_evaluator = _AsyncRougeScoreEvaluator(rouge_type)
96
+ @override
97
+ def __init__(
98
+ self,
99
+ rouge_type: RougeType,
100
+ *,
101
+ precision_threshold: float = 0.5,
102
+ recall_threshold: float = 0.5,
103
+ f1_score_threshold: float = 0.5,
104
+ ):
105
+ self._rouge_type = rouge_type
106
+ self._higher_is_better = True
107
+ super().__init__()
108
+
109
+ # Type checking for threshold parameters
110
+ for name, value in [
111
+ ("precision_threshold", precision_threshold),
112
+ ("recall_threshold", recall_threshold),
113
+ ("f1_score_threshold", f1_score_threshold),
114
+ ]:
115
+ if not isinstance(value, float):
116
+ raise TypeError(f"{name} must be a float, got {type(value)}")
117
+
118
+ self._threshold = {
119
+ "precision": precision_threshold,
120
+ "recall": recall_threshold,
121
+ "f1_score": f1_score_threshold,
122
+ }
81
123
 
82
- def __call__(self, *, ground_truth: str, response: str, **kwargs):
124
+ def _get_binary_result(
125
+ self,
126
+ rouge_precision: float,
127
+ rouge_recall: float,
128
+ rouge_f1_score: float,
129
+ ) -> Dict[str, bool]:
130
+ """
131
+ Get binary result based on the threshold.
132
+
133
+ :param rouge_precision: The precision score.
134
+ :type rouge_precision: float
135
+ :param rouge_recall: The recall score.
136
+ :type rouge_recall: float
137
+ :param rouge_f1_score: The F1 score.
138
+ :type rouge_f1_score: float
139
+ :return: A dictionary with binary results for precision, recall, and F1 score.
140
+
141
+ """
142
+ # Initialize results with False for NaN values
143
+ results = {
144
+ "rouge_precision_result": False,
145
+ "rouge_recall_result": False,
146
+ "rouge_f1_score_result": False,
147
+ }
148
+
149
+ # Check if values are valid (not NaN) before comparison
150
+ precision_valid = not math.isnan(rouge_precision)
151
+ recall_valid = not math.isnan(rouge_recall)
152
+ f1_valid = not math.isnan(rouge_f1_score)
153
+
154
+ if self._higher_is_better:
155
+ if precision_valid:
156
+ results["rouge_precision_result"] = rouge_precision >= self._threshold["precision"]
157
+ if recall_valid:
158
+ results["rouge_recall_result"] = rouge_recall >= self._threshold["recall"]
159
+ if f1_valid:
160
+ results["rouge_f1_score_result"] = rouge_f1_score >= self._threshold["f1_score"]
161
+ else:
162
+ if precision_valid:
163
+ results["rouge_precision_result"] = rouge_precision <= self._threshold["precision"]
164
+ if recall_valid:
165
+ results["rouge_recall_result"] = rouge_recall <= self._threshold["recall"]
166
+ if f1_valid:
167
+ results["rouge_f1_score_result"] = rouge_f1_score <= self._threshold["f1_score"]
168
+
169
+ return results
170
+
171
+ @override
172
+ async def _do_eval(self, eval_input: Dict) -> Dict[str, float]:
173
+ """Produce a rouge score evaluation result.
174
+
175
+ :param eval_input: The input to the evaluation function.
176
+ :type eval_input: Dict
177
+ :return: The evaluation result.
178
+ :rtype: Dict
179
+ """
180
+ ground_truth = eval_input["ground_truth"]
181
+ response = eval_input["response"]
182
+ scorer = rouge_scorer.RougeScorer(rouge_types=[self._rouge_type])
183
+ metrics = scorer.score(ground_truth, response)[self._rouge_type]
184
+ binary_results = {
185
+ "rouge_precision_result": False,
186
+ "rouge_recall_result": False,
187
+ "rouge_f1_score_result": False,
188
+ }
189
+ # Convert metrics to floats, using nan for None or non-convertible values
190
+ rouge_precision = float(metrics.precision) if metrics.precision is not None else float("nan")
191
+ rouge_recall = float(metrics.recall) if metrics.recall is not None else float("nan")
192
+ rouge_f1_score = float(metrics.fmeasure) if metrics.fmeasure is not None else float("nan")
193
+ binary_results = self._get_binary_result(
194
+ rouge_precision=rouge_precision,
195
+ rouge_recall=rouge_recall,
196
+ rouge_f1_score=rouge_f1_score,
197
+ )
198
+ return {
199
+ "rouge_precision": rouge_precision,
200
+ "rouge_recall": rouge_recall,
201
+ "rouge_f1_score": rouge_f1_score,
202
+ "rouge_precision_result": EVALUATION_PASS_FAIL_MAPPING[binary_results["rouge_precision_result"]],
203
+ "rouge_recall_result": EVALUATION_PASS_FAIL_MAPPING[binary_results["rouge_recall_result"]],
204
+ "rouge_f1_score_result": EVALUATION_PASS_FAIL_MAPPING[binary_results["rouge_f1_score_result"]],
205
+ "rouge_precision_threshold": self._threshold["precision"],
206
+ "rouge_recall_threshold": self._threshold["recall"],
207
+ "rouge_f1_score_threshold": self._threshold["f1_score"],
208
+ }
209
+
210
+ @overload # type: ignore
211
+ def __call__(self, *, ground_truth: str, response: str) -> Dict[str, float]:
83
212
  """
84
213
  Evaluate the ROUGE score between the response and the ground truth.
85
214
 
@@ -90,9 +219,20 @@ class RougeScoreEvaluator:
90
219
  :return: The ROUGE score.
91
220
  :rtype: Dict[str, float]
92
221
  """
93
- return async_run_allowing_running_loop(
94
- self._async_evaluator, ground_truth=ground_truth, response=response, **kwargs
95
- )
96
222
 
97
- def _to_async(self):
98
- return self._async_evaluator
223
+ @override
224
+ def __call__( # pylint: disable=docstring-missing-param
225
+ self,
226
+ *args,
227
+ **kwargs,
228
+ ):
229
+ """
230
+ Evaluate route score.
231
+ :keyword response: The response to be evaluated.
232
+ :paramtype response: str
233
+ :keyword ground_truth: The ground truth to be compared against.
234
+ :paramtype ground_truth: str
235
+ :return: The ROUGE score.
236
+ :rtype: Dict[str, float]
237
+ """
238
+ return super().__call__(*args, **kwargs)
@@ -24,9 +24,11 @@ class GroundednessProEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
24
24
 
25
25
  :param credential: The credential for connecting to Azure AI project. Required
26
26
  :type credential: ~azure.core.credentials.TokenCredential
27
- :param azure_ai_project: The scope of the Azure AI project.
28
- It contains subscription id, resource group, and project name.
29
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
27
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
28
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
29
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
30
+ :param threshold: The threshold for the groundedness pro evaluator. Default is 5.
31
+ :type threshold: int
30
32
  :param kwargs: Additional arguments to pass to the evaluator.
31
33
  :type kwargs: Any
32
34
 
@@ -39,28 +41,52 @@ class GroundednessProEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
39
41
  :dedent: 8
40
42
  :caption: Initialize and call a GroundednessProEvaluator with a query, response, and context.
41
43
 
44
+ .. admonition:: Example using Azure AI Project URL:
45
+
46
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
47
+ :start-after: [START groundedness_pro_evaluator]
48
+ :end-before: [END groundedness_pro_evaluator]
49
+ :language: python
50
+ :dedent: 8
51
+ :caption: Initialize and call GroundednessProEvaluator using Azure AI Project URL in the following format
52
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
53
+
54
+ .. admonition:: Example with threshold:
55
+
56
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
57
+ :start-after: [START threshold_groundedness_pro_evaluator]
58
+ :end-before: [END threshold_groundedness_pro_evaluator]
59
+ :language: python
60
+ :dedent: 8
61
+ :caption: Initialize with a specified threshold and call GroundednessProEvaluator with a query, response, and context.
62
+
42
63
  .. note::
43
64
 
44
65
  If this evaluator is supplied to the `evaluate` function, the aggregated metric
45
66
  for the groundedness pro label will be "groundedness_pro_passing_rate".
46
67
  """
47
68
 
48
- id = "azureml://registries/azureml/models/Groundedness-Pro-Evaluator/versions/1"
69
+ id = "azureai://built-in/evaluators/groundedness_pro"
49
70
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
71
+ _OPTIONAL_PARAMS = ["query"]
50
72
 
51
73
  @override
52
74
  def __init__(
53
75
  self,
54
76
  credential,
55
77
  azure_ai_project,
78
+ *,
79
+ threshold: int = 5,
56
80
  **kwargs,
57
81
  ):
58
- self._passing_score = 5 # TODO update once the binarization PR is merged
82
+ self.threshold = threshold
83
+ self._higher_is_better = True
59
84
  self._output_prefix = "groundedness_pro"
60
85
  super().__init__(
61
86
  eval_metric=EvaluationMetrics.GROUNDEDNESS,
62
87
  azure_ai_project=azure_ai_project,
63
88
  credential=credential,
89
+ threshold=self.threshold,
64
90
  **kwargs,
65
91
  )
66
92
 
@@ -141,8 +167,13 @@ class GroundednessProEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
141
167
  """
142
168
  result = await super()._do_eval(eval_input)
143
169
  real_result = {}
170
+ real_result[self._output_prefix + "_reason"] = result[EvaluationMetrics.GROUNDEDNESS + "_reason"]
144
171
  real_result[self._output_prefix + "_label"] = (
145
- result[EvaluationMetrics.GROUNDEDNESS + "_score"] >= self._passing_score
172
+ result[EvaluationMetrics.GROUNDEDNESS + "_score"] >= self.threshold
146
173
  )
147
- real_result[self._output_prefix + "_reason"] = result[EvaluationMetrics.GROUNDEDNESS + "_reason"]
174
+ if self._higher_is_better:
175
+ real_result[self._output_prefix + "_score"] = max(result[EvaluationMetrics.GROUNDEDNESS + "_score"], 0)
176
+ else:
177
+ real_result[self._output_prefix + "_score"] = min(result[EvaluationMetrics.GROUNDEDNESS + "_score"], 1)
178
+
148
179
  return real_result
@@ -2,85 +2,17 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- import math
6
5
  import os
7
- import re
6
+ from typing import Dict
8
7
 
9
- from promptflow._utils.async_utils import async_run_allowing_running_loop
10
- from promptflow.core import AsyncPrompty
8
+ from typing_extensions import overload, override
11
9
 
12
- from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
+ from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
13
11
 
14
- from ..._common.utils import construct_prompty_model_config, validate_model_config
15
12
 
16
- try:
17
- from ..._user_agent import USER_AGENT
18
- except ImportError:
19
- USER_AGENT = "None"
20
-
21
-
22
- class _AsyncSimilarityEvaluator:
23
- # Constants must be defined within eval's directory to be save/loadable
24
- _PROMPTY_FILE = "similarity.prompty"
25
- _LLM_CALL_TIMEOUT = 600
26
- _DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
27
-
28
- def __init__(self, model_config: dict):
29
- prompty_model_config = construct_prompty_model_config(
30
- validate_model_config(model_config),
31
- self._DEFAULT_OPEN_API_VERSION,
32
- USER_AGENT,
33
- )
34
-
35
- current_dir = os.path.dirname(__file__)
36
- prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
37
- self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
38
-
39
- async def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs):
40
- """
41
- Evaluate similarity.
42
-
43
- :keyword query: The query to be evaluated.
44
- :paramtype query: str
45
- :keyword response: The response to be evaluated.
46
- :paramtype response: str
47
- :keyword ground_truth: The ground truth to be evaluated.
48
- :paramtype ground_truth: str
49
- :return: The similarity score.
50
- :rtype: Dict[str, float]
51
- """
52
- # Validate input parameters
53
- query = str(query or "")
54
- response = str(response or "")
55
- ground_truth = str(ground_truth or "")
56
-
57
- if not (query.strip() and response.strip() and ground_truth.strip()):
58
- msg = "'query', 'response' and 'ground_truth' must be non-empty strings."
59
- raise EvaluationException(
60
- message=msg,
61
- internal_message=msg,
62
- error_category=ErrorCategory.MISSING_FIELD,
63
- error_blame=ErrorBlame.USER_ERROR,
64
- error_target=ErrorTarget.SIMILARITY_EVALUATOR,
65
- )
66
-
67
- # Run the evaluation flow
68
- llm_output = await self._flow(
69
- query=query, response=response, ground_truth=ground_truth, timeout=self._LLM_CALL_TIMEOUT, **kwargs
70
- )
71
-
72
- score = math.nan
73
- if llm_output:
74
- match = re.search(r"\d", llm_output)
75
- if match:
76
- score = float(match.group())
77
-
78
- return {"similarity": float(score), "gpt_similarity": float(score)}
79
-
80
-
81
- class SimilarityEvaluator:
13
+ class SimilarityEvaluator(PromptyEvaluatorBase):
82
14
  """
83
- Evaluates similarity score for a given query, response, and ground truth or a multi-turn conversation.
15
+ Evaluates similarity score for a given query, response, and ground truth.
84
16
 
85
17
  The similarity measure evaluates the likeness between a ground truth sentence (or document) and the
86
18
  AI model's generated prediction. This calculation involves creating sentence-level embeddings for both
@@ -96,15 +28,41 @@ class SimilarityEvaluator:
96
28
  :param model_config: Configuration for the Azure OpenAI model.
97
29
  :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
98
30
  ~azure.ai.evaluation.OpenAIModelConfiguration]
31
+ :param threshold: The threshold for the similarity evaluator. Default is 3.
32
+ :type threshold: int
33
+ :param credential: The credential for authenticating to Azure AI service.
34
+ :type credential: ~azure.core.credentials.TokenCredential
35
+ :keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
36
+ This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
37
+ :paramtype is_reasoning_model: bool
99
38
 
100
39
  .. admonition:: Example:
101
40
 
102
41
  .. literalinclude:: ../samples/evaluation_samples_evaluate.py
103
- :start-after: [START rouge_score_evaluator]
104
- :end-before: [END rouge_score_evaluator]
42
+ :start-after: [START similarity_evaluator]
43
+ :end-before: [END similarity_evaluator]
105
44
  :language: python
106
45
  :dedent: 8
107
- :caption: Initialize and call a RougeScoreEvaluator with a four-gram rouge type.
46
+ :caption: Initialize and call a SimilarityEvaluator with a four-gram rouge type.
47
+
48
+ .. admonition:: Example using Azure AI Project URL:
49
+
50
+ .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
51
+ :start-after: [START similarity_evaluator]
52
+ :end-before: [END similarity_evaluator]
53
+ :language: python
54
+ :dedent: 8
55
+ :caption: Initialize and call SimilarityEvaluator using Azure AI Project URL in the following format
56
+ https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
57
+
58
+ .. admonition:: Example:
59
+
60
+ .. literalinclude:: ../samples/evaluation_samples_threshold.py
61
+ :start-after: [START threshold_similarity_evaluator]
62
+ :end-before: [END threshold_similarity_evaluator]
63
+ :language: python
64
+ :dedent: 8
65
+ :caption: Initialize with a threshold and call a SimilarityEvaluator.
108
66
 
109
67
  .. note::
110
68
 
@@ -113,13 +71,37 @@ class SimilarityEvaluator:
113
71
  however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
114
72
  """
115
73
 
116
- id = "azureml://registries/azureml/models/Similarity-Evaluator/versions/3"
74
+ # Constants must be defined within eval's directory to be save/loadable
75
+
76
+ _PROMPTY_FILE = "similarity.prompty"
77
+ _RESULT_KEY = "similarity"
78
+
79
+ id = "azureai://built-in/evaluators/similarity"
117
80
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
118
81
 
119
- def __init__(self, model_config):
120
- self._async_evaluator = _AsyncSimilarityEvaluator(model_config)
82
+ @override
83
+ def __init__(self, model_config, *, threshold=3, credential=None, **kwargs):
84
+ current_dir = os.path.dirname(__file__)
85
+ prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
86
+ self._threshold = threshold
87
+ self._higher_is_better = True
88
+ super().__init__(
89
+ model_config=model_config,
90
+ prompty_file=prompty_path,
91
+ result_key=self._RESULT_KEY,
92
+ threshold=threshold,
93
+ credential=credential,
94
+ _higher_is_better=self._higher_is_better,
95
+ **kwargs,
96
+ )
121
97
 
122
- def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs):
98
+ # Ignoring a mypy error about having only 1 overload function.
99
+ # We want to use the overload style for all evals, even single-inputs. This is both to make
100
+ # refactoring to multi-input styles easier, stylistic consistency consistency across evals,
101
+ # and due to the fact that non-overloaded syntax now causes various parsing issues that
102
+ # we don't want to deal with.
103
+ @overload # type: ignore
104
+ def __call__(self, *, query: str, response: str, ground_truth: str) -> Dict[str, float]:
123
105
  """
124
106
  Evaluate similarity.
125
107
 
@@ -132,9 +114,23 @@ class SimilarityEvaluator:
132
114
  :return: The similarity score.
133
115
  :rtype: Dict[str, float]
134
116
  """
135
- return async_run_allowing_running_loop(
136
- self._async_evaluator, query=query, response=response, ground_truth=ground_truth, **kwargs
137
- )
138
117
 
139
- def _to_async(self):
140
- return self._async_evaluator
118
+ @override
119
+ def __call__( # pylint: disable=docstring-missing-param
120
+ self,
121
+ *args,
122
+ **kwargs,
123
+ ):
124
+ """
125
+ Evaluate similarity.
126
+
127
+ :keyword query: The query to be evaluated.
128
+ :paramtype query: str
129
+ :keyword response: The response to be evaluated.
130
+ :paramtype response: str
131
+ :keyword ground_truth: The ground truth to be evaluated.
132
+ :paramtype ground_truth: str
133
+ :return: The similarity score.
134
+ :rtype: Dict[str, float]
135
+ """
136
+ return super().__call__(*args, **kwargs)
@@ -0,0 +1,7 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ._task_adherence import TaskAdherenceEvaluator
6
+
7
+ __all__ = ["TaskAdherenceEvaluator"]