azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +85 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +147 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +87 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
  155. azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
  156. azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
  157. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  158. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  159. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  160. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  161. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  162. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  163. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  165. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
  264. azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,477 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # This contains code merged together from the following files:
6
+ # promptflow-devkit/promptflow/batch/_batch_engine.py
7
+ # promptflow-devkit/promptflow/_proxy/_python_executor_proxy.py
8
+ # promptflow-core/promptflow/executor/_script_executor.py
9
+ # TODO ralphe: The way this code does batch execution needs to be improved. For now
10
+ # porting over the code largely as is to remove the Promptflow dependency
11
+ # as quickly as possible. In phase 2 this code will be heavily refactored.
12
+
13
+ import inspect
14
+ import re
15
+ import asyncio
16
+
17
+ from math import floor
18
+ from asyncio import Semaphore
19
+ from concurrent.futures import Executor
20
+ from functools import partial
21
+ from contextlib import contextmanager
22
+ from datetime import datetime, timezone
23
+ from typing import (
24
+ Any,
25
+ Callable,
26
+ Dict,
27
+ Final,
28
+ Generator,
29
+ List,
30
+ Mapping,
31
+ MutableMapping,
32
+ Optional,
33
+ Sequence,
34
+ Set,
35
+ Tuple,
36
+ cast,
37
+ Literal,
38
+ )
39
+ from uuid import uuid4
40
+
41
+ from ._config import BatchEngineConfig
42
+ from ._utils import DEFAULTS_KEY, get_int_env_var, get_value_from_path, is_async_callable
43
+ from ._status import BatchStatus
44
+ from ._result import BatchResult, BatchRunDetails, BatchRunError, TokenMetrics
45
+ from ._run_storage import AbstractRunStorage, NoOpRunStorage
46
+ from .._common._logging import log_progress, logger, NodeLogManager
47
+ from ..._exceptions import ErrorBlame, EvaluationException
48
+ from ._exceptions import (
49
+ BatchEngineCanceledError,
50
+ BatchEngineError,
51
+ BatchEngineRunFailedError,
52
+ BatchEngineTimeoutError,
53
+ BatchEngineValidationError,
54
+ )
55
+ from ._utils_deprecated import (
56
+ async_run_allowing_running_loop,
57
+ convert_eager_flow_output_to_dict,
58
+ )
59
+ from ._openai_injector import CaptureOpenAITokenUsage
60
+
61
+
62
+ MAX_WORKER_COUNT: Final[int] = 10
63
+ KEYWORD_PATTERN: Final = re.compile(r"^\${([^{}]+)}$")
64
+
65
+
66
+ class BatchEngine:
67
+ """This class is used to execute flows in batch mode"""
68
+
69
+ def __init__(
70
+ self,
71
+ func: Callable,
72
+ *,
73
+ config: BatchEngineConfig,
74
+ storage: Optional[AbstractRunStorage] = None,
75
+ executor: Optional[Executor] = None,
76
+ ):
77
+ """Create a new batch engine instance
78
+
79
+ :param Callable func: The function to run the flow
80
+ :param BatchEngineConfig config: The configuration for the batch engine
81
+ :param Optional[AbstractRunStorage] storage: The storage to store execution results
82
+ :param Optional[Executor] executor: The executor to run the flow (if needed)
83
+ """
84
+
85
+ self._func: Callable = func
86
+ self._config: BatchEngineConfig = config
87
+ self._storage: AbstractRunStorage = storage or NoOpRunStorage()
88
+
89
+ self._batch_timeout_sec = self._config.batch_timeout_seconds
90
+ self._line_timeout_sec = self._config.line_timeout_seconds
91
+ self._max_worker_count = self._config.max_concurrency
92
+
93
+ self._executor: Optional[Executor] = executor
94
+ self._is_canceled: bool = False
95
+
96
+ async def run(
97
+ self,
98
+ data: Sequence[Mapping[str, Any]],
99
+ column_mapping: Optional[Mapping[str, str]],
100
+ *,
101
+ id: Optional[str] = None,
102
+ max_lines: Optional[int] = None,
103
+ ) -> BatchResult:
104
+ if not data:
105
+ raise BatchEngineValidationError("Please provide a non-empty data mapping.")
106
+
107
+ start_time = datetime.now(timezone.utc)
108
+
109
+ batch_inputs = self._apply_column_mapping(data, column_mapping, max_lines)
110
+ if not batch_inputs or all(len(data) == 0 for data in batch_inputs):
111
+ raise BatchEngineValidationError("No data to process.")
112
+
113
+ try:
114
+ id = id or str(uuid4())
115
+ result: BatchResult = await self._exec_in_task(id, batch_inputs, start_time)
116
+ return result
117
+ except EvaluationException:
118
+ raise
119
+ except Exception as ex:
120
+ raise BatchEngineError(
121
+ "Unexpected error while running the batch run.", blame=ErrorBlame.SYSTEM_ERROR
122
+ ) from ex
123
+
124
+ def cancel(self):
125
+ # TODO ralphe: Make sure this works
126
+ self._is_canceled = True
127
+
128
+ def _apply_column_mapping(
129
+ self,
130
+ data: Sequence[Mapping[str, Any]],
131
+ column_mapping: Optional[Mapping[str, str]],
132
+ max_lines: Optional[int],
133
+ ) -> Sequence[Mapping[str, str]]:
134
+
135
+ resolved_column_mapping: Mapping[str, str] = self._resolve_column_mapping(column_mapping)
136
+ resolved_column_mapping.update(self._generate_defaults_for_column_mapping())
137
+ return self._apply_column_mapping_to_lines(data, resolved_column_mapping, max_lines)
138
+
139
+ def _resolve_column_mapping(
140
+ self,
141
+ column_mapping: Optional[Mapping[str, str]],
142
+ ) -> Mapping[str, str]:
143
+ parameters = inspect.signature(self._func).parameters
144
+ default_column_mapping: Dict[str, str] = {
145
+ name: f"${{data.{name}}}"
146
+ for name, value in parameters.items()
147
+ if name not in ["self", "cls", "args", "kwargs"]
148
+ }
149
+ resolved_mapping: Dict[str, str] = default_column_mapping.copy()
150
+
151
+ for name, value in parameters.items():
152
+ if value and value.default is not inspect.Parameter.empty:
153
+ resolved_mapping.pop(name)
154
+
155
+ resolved_mapping.update(column_mapping or {})
156
+ return resolved_mapping
157
+
158
+ def _generate_defaults_for_column_mapping(self) -> Mapping[Literal["$defaults$"], Any]:
159
+
160
+ return {
161
+ DEFAULTS_KEY: {
162
+ name: value.default
163
+ for name, value in inspect.signature(self._func).parameters.items()
164
+ if value.default is not inspect.Parameter.empty
165
+ }
166
+ }
167
+
168
+ @staticmethod
169
+ def _apply_column_mapping_to_lines(
170
+ data: Sequence[Mapping[str, Any]],
171
+ column_mapping: Mapping[str, str],
172
+ max_lines: Optional[int],
173
+ ) -> Sequence[Mapping[str, Any]]:
174
+ data = data[:max_lines] if max_lines else data
175
+
176
+ inputs: Sequence[Mapping[str, Any]] = []
177
+ defaults = cast(Mapping[str, Any], column_mapping.get(DEFAULTS_KEY, {}))
178
+
179
+ for line_number, input in enumerate(data, start=1):
180
+ mapped: Dict[str, Any] = {}
181
+ missing_inputs: Set[str] = set()
182
+
183
+ for key, value in column_mapping.items():
184
+ if key == DEFAULTS_KEY:
185
+ # Skip the defaults key
186
+ continue
187
+
188
+ if not isinstance(value, str):
189
+ # All non-string values are literal values.
190
+ mapped[key] = value
191
+ continue
192
+
193
+ match: Optional[re.Match[str]] = re.search(KEYWORD_PATTERN, value)
194
+ if match is None:
195
+ # Literal string value value
196
+ mapped[key] = value
197
+ continue
198
+
199
+ dict_path = match.group(1)
200
+ found, mapped_value = get_value_from_path(dict_path, input)
201
+ if not found: # try default value
202
+ found, mapped_value = get_value_from_path(dict_path, defaults)
203
+
204
+ if found:
205
+ mapped[key] = mapped_value
206
+ else:
207
+ missing_inputs.add(dict_path)
208
+
209
+ if missing_inputs:
210
+ missing = ", ".join(missing_inputs)
211
+ raise BatchEngineValidationError(f"Missing inputs for line {line_number}: '{missing}'")
212
+
213
+ inputs.append(mapped)
214
+
215
+ return inputs
216
+
217
+ async def _exec_in_task(
218
+ self, run_id: str, batch_inputs: Sequence[Mapping[str, Any]], start_time: datetime
219
+ ) -> BatchResult:
220
+ # Since the batch execution is not guaranteed to be completed in the same order
221
+ # as the inputs, we keep track of these in a mapping from index to result
222
+ results: Dict[int, BatchRunDetails] = {}
223
+ status: BatchStatus = BatchStatus.Completed
224
+ error: Optional[Exception] = None
225
+
226
+ task = asyncio.create_task(self._exec_batch(run_id, batch_inputs, start_time, results))
227
+
228
+ while not task.done():
229
+ # check whether the task is completed or canceled every 1s
230
+ await asyncio.sleep(1)
231
+ if self._is_canceled:
232
+ task.cancel()
233
+ # use current completed line results and aggregation results to create a BatchResult
234
+ status = BatchStatus.Canceled
235
+ error = BatchEngineCanceledError("The batch run is canceled by user.")
236
+ break
237
+ elif self._batch_timeout_expired(start_time):
238
+ task.cancel()
239
+ status = BatchStatus.Failed
240
+ error = BatchEngineTimeoutError(
241
+ f"The batch run failed due to timeout [{self._batch_timeout_sec}s]. "
242
+ f"Please adjust the timeout to a higher value."
243
+ )
244
+ break
245
+
246
+ end_time = datetime.now(timezone.utc)
247
+ metrics = TokenMetrics(0, 0, 0)
248
+ failed_lines: int = 0
249
+
250
+ # generate the details in the same order as the inputs and fill in the missing results
251
+ # with a failed status
252
+ result_details = [
253
+ (
254
+ results[i]
255
+ if i in results
256
+ else BatchRunDetails(
257
+ id=BatchRunDetails.create_id(run_id, i),
258
+ status=BatchStatus.Failed,
259
+ result=None,
260
+ start_time=None,
261
+ end_time=None,
262
+ tokens=TokenMetrics(0, 0, 0),
263
+ error=BatchRunError("The line run is not completed.", None),
264
+ index=i,
265
+ )
266
+ )
267
+ for i in range(len(batch_inputs))
268
+ ]
269
+ self.handle_line_failures(result_details)
270
+
271
+ for line_result in result_details:
272
+ # Indicate the worst status of the batch run. This works because
273
+ # canceled and failed have a higher value than completed.
274
+ status = max(status, line_result.status)
275
+ if BatchStatus.is_failed(line_result.status):
276
+ failed_lines += 1
277
+ if line_result.tokens:
278
+ metrics.prompt_tokens += line_result.tokens.prompt_tokens
279
+ metrics.completion_tokens += line_result.tokens.completion_tokens
280
+ metrics.total_tokens += line_result.tokens.total_tokens
281
+
282
+ if failed_lines and not error:
283
+ error_message = f"{floor(failed_lines / len(batch_inputs) * 100)}% of the batch run failed."
284
+ first_exception: Optional[Exception] = next(
285
+ (result.error.exception for result in result_details if result.error and result.error.exception),
286
+ None,
287
+ )
288
+ if first_exception is not None:
289
+ error_message += f" {first_exception}"
290
+
291
+ error = BatchEngineRunFailedError(error_message)
292
+
293
+ return BatchResult(
294
+ status=status,
295
+ total_lines=len(batch_inputs),
296
+ failed_lines=failed_lines,
297
+ start_time=start_time,
298
+ end_time=end_time,
299
+ tokens=metrics,
300
+ details=result_details,
301
+ error=error,
302
+ )
303
+
304
+ async def _exec_batch(
305
+ self,
306
+ run_id: str,
307
+ batch_inputs: Sequence[Mapping[str, Any]],
308
+ start_time: datetime,
309
+ results: MutableMapping[int, BatchRunDetails],
310
+ ) -> None:
311
+ semaphore: Semaphore = Semaphore(self._max_worker_count)
312
+
313
+ # TODO ralphe: This async code needs to refactored to use e.g. asyncio.gather, or
314
+ # asyncio.as_completed.
315
+ # TODO ralphe: This code needs to handle cancellation better
316
+ async def create_under_semaphore(index: int, inputs: Mapping[str, Any]):
317
+ async with semaphore:
318
+ return await self._exec_line_async(run_id, inputs, index)
319
+
320
+ pending = [
321
+ asyncio.create_task(create_under_semaphore(index, inputs)) for index, inputs in enumerate(batch_inputs)
322
+ ]
323
+
324
+ total_lines: int = len(batch_inputs)
325
+ completed_lines: int = 0
326
+ while completed_lines < total_lines:
327
+ # TODO ralphe: Fix this code so it doesn't re-order the outputs
328
+ # wait for any task to complete
329
+ done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
330
+ completed_line_results = [task.result() for task in done]
331
+ # persist node run infos and flow run info in line result to storage
332
+ self._persist_run_info([result for _, result in completed_line_results])
333
+ results.update({index: result for index, result in completed_line_results})
334
+ # update the progress log
335
+ completed_lines += len(completed_line_results)
336
+ log_progress(
337
+ run_start_time=start_time,
338
+ total_count=total_lines,
339
+ current_count=completed_lines,
340
+ # TODO ralphe: set logger to use here
341
+ )
342
+
343
+ def __preprocess_inputs(self, inputs: Mapping[str, Any]) -> Mapping[str, Any]:
344
+
345
+ func_params = inspect.signature(self._func).parameters
346
+
347
+ has_kwargs = any(p.kind == p.VAR_KEYWORD for p in func_params.values())
348
+
349
+ if has_kwargs:
350
+ return inputs
351
+ else:
352
+ filtered_params = {key: value for key, value in inputs.items() if key in func_params}
353
+ return filtered_params
354
+
355
+ async def _exec_line_async(
356
+ self,
357
+ run_id: str,
358
+ inputs: Mapping[str, Any],
359
+ index: int,
360
+ ) -> Tuple[int, BatchRunDetails]:
361
+ with self._exec_line_context(run_id, index):
362
+ details: BatchRunDetails = BatchRunDetails(
363
+ id=f"{run_id}_{index}",
364
+ status=BatchStatus.NotStarted,
365
+ result=None,
366
+ start_time=datetime.now(timezone.utc),
367
+ end_time=None,
368
+ tokens=TokenMetrics(0, 0, 0),
369
+ error=None,
370
+ index=index,
371
+ )
372
+
373
+ try:
374
+ # TODO ralphe: Handle line timeouts here
375
+ with CaptureOpenAITokenUsage() as captured_tokens:
376
+ # NOTE: In the legacy code, any synchronous functions were executed in a different process
377
+ # for isolation reasons. However this isolation was violated in the way the code was
378
+ # used by the evaluation SDK (e.g. you need to have the module already loaded to pass the
379
+ # callable into the batch engine, so starting a new process to examine it was redundant).
380
+ # It also came with performance and memory usage costs (each line was processed in a
381
+ # separate process up to a maximum of 4), and these processes were created and torn down
382
+ # too frequently.
383
+ # For now we will just run the function in the current process, but in the future we may
384
+ # want to consider running the function in a separate process for isolation reasons.
385
+ output: Any
386
+
387
+ processed_inputs = self.__preprocess_inputs(inputs)
388
+ if is_async_callable(self._func):
389
+ output = await self._func(**processed_inputs)
390
+ else:
391
+ # to maximize the parallelism, we run the synchronous function in a separate thread
392
+ # and await its result
393
+ output = await asyncio.get_event_loop().run_in_executor(
394
+ self._executor, partial(self._func, **processed_inputs)
395
+ )
396
+
397
+ # This should in theory never happen but as an extra precaution, let's check if the output
398
+ # is awaitable and await it if it is.
399
+ if inspect.isawaitable(output):
400
+ output = await output
401
+
402
+ details.status = BatchStatus.Completed
403
+ details.result = convert_eager_flow_output_to_dict(output)
404
+ details.tokens.update(captured_tokens)
405
+ except Exception as ex:
406
+ details.status = BatchStatus.Failed
407
+ details.error = BatchRunError(
408
+ f"Error while evaluating single input: {ex.__class__.__name__}: {str(ex)}", ex
409
+ )
410
+ finally:
411
+ details.end_time = datetime.now(timezone.utc)
412
+
413
+ return index, details
414
+
415
+ @staticmethod
416
+ def handle_line_failures(run_infos: List[BatchRunDetails], raise_on_line_failure: bool = False):
417
+ """Handle line failures in batch run"""
418
+ failed_run_infos: List[BatchRunDetails] = [r for r in run_infos if r.status == BatchStatus.Failed]
419
+ failed_msg: Optional[str] = None
420
+ if len(failed_run_infos) > 0:
421
+ failed_indexes = ",".join([str(r.index) for r in failed_run_infos])
422
+ first_fail_exception: str = failed_run_infos[0].error.details
423
+ if raise_on_line_failure:
424
+ failed_msg = "Flow run failed due to the error: " + first_fail_exception
425
+ raise Exception(failed_msg)
426
+
427
+ failed_msg = (
428
+ f"{len(failed_run_infos)}/{len(run_infos)} flow run failed, indexes: [{failed_indexes}],"
429
+ f" exception of index {failed_run_infos[0].index}: {first_fail_exception}"
430
+ )
431
+ logger.error(failed_msg)
432
+
433
+ def _persist_run_info(self, line_results: Sequence[BatchRunDetails]):
434
+ # TODO ralphe: implement?
435
+ pass
436
+
437
+ def _batch_timeout_expired(self, start_time: datetime) -> bool:
438
+ if self._batch_timeout_sec is None:
439
+ return False
440
+ return (datetime.now(timezone.utc) - start_time).total_seconds() > self._batch_timeout_sec
441
+
442
+ @contextmanager
443
+ def _exec_line_context(self, run_id: str, line_number: int) -> Generator[None, Any, None]:
444
+ # TODO ralphe: Do proper tracing and logging here
445
+ log_manager = NodeLogManager()
446
+ log_manager.set_node_context(run_id, "Flex", line_number)
447
+ with log_manager, self._update_operation_context(run_id, line_number):
448
+ yield
449
+
450
+ @contextmanager
451
+ def _update_operation_context(self, run_id: str, line_number: int) -> Generator[None, Any, None]:
452
+ # operation_context = OperationContext.get_instance()
453
+ # original_context = operation_context.copy()
454
+ # original_mode = operation_context.get("run_mode", RunMode.Test.name)
455
+ # values_for_context = {"flow_id": self._flow_id, "root_run_id": run_id}
456
+ # if original_mode == RunMode.Batch.name:
457
+ # values_for_otel = {
458
+ # "batch_run_id": run_id,
459
+ # "line_number": line_number,
460
+ # }
461
+ # else:
462
+ # values_for_otel = {"line_run_id": run_id}
463
+ # try:
464
+ # append_promptflow_package_ua(operation_context)
465
+ # operation_context.set_execution_target(execution_target=self._execution_target)
466
+ # operation_context.set_default_tracing_keys(DEFAULT_TRACING_KEYS)
467
+ # operation_context.run_mode = original_mode
468
+ # operation_context.update(values_for_context)
469
+ # for k, v in values_for_otel.items():
470
+ # operation_context._add_otel_attributes(k, v)
471
+ # # Inject OpenAI API to make sure traces and headers injection works and
472
+ # # update OpenAI API configs from environment variables.
473
+ # inject_openai_api()
474
+ yield
475
+
476
+ # finally:
477
+ # OperationContext.set_instance(original_context)
@@ -0,0 +1,88 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ..._exceptions import ErrorCategory, ErrorBlame, ErrorTarget, EvaluationException
6
+
7
+
8
+ class BatchEngineError(EvaluationException):
9
+ """Exception class for batch engine errors.
10
+
11
+ This exception is used to indicate that the error was caused by or in the batch engine.
12
+
13
+ :param message: The error message.
14
+ :type message: str
15
+ """
16
+
17
+ def __init__(self, message: str, **kwargs):
18
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
19
+ kwargs.setdefault("target", ErrorTarget.EVAL_RUN)
20
+ kwargs.setdefault("blame", ErrorBlame.UNKNOWN)
21
+
22
+ super().__init__(message, **kwargs)
23
+
24
+
25
+ class BatchEngineValidationError(BatchEngineError):
26
+ """Exception raised when validation fails
27
+
28
+ :param message: The error message.
29
+ :type message: str
30
+ """
31
+
32
+ def __init__(self, message: str, **kwargs):
33
+ kwargs.setdefault("category", ErrorCategory.INVALID_VALUE)
34
+ kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
35
+ super().__init__(message, **kwargs)
36
+
37
+
38
+ class BatchEngineTimeoutError(BatchEngineError):
39
+ """Exception raised when a batch engine operation times out.
40
+
41
+ :param message: The error message.
42
+ :type message: str
43
+ """
44
+
45
+ def __init__(self, message: str, **kwargs):
46
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
47
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
48
+ super().__init__(message, **kwargs)
49
+
50
+
51
+ class BatchEngineCanceledError(BatchEngineError):
52
+ """Exception raised when a batch engine operation is canceled.
53
+
54
+ :param message: The error message.
55
+ :type message: str
56
+ """
57
+
58
+ def __init__(self, message: str, **kwargs):
59
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
60
+ kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
61
+ super().__init__(message, **kwargs)
62
+
63
+
64
+ class BatchEngineRunFailedError(BatchEngineError):
65
+ """Exception raised when a batch engine run fails.
66
+
67
+ :param message: The error message.
68
+ :type message: str
69
+ """
70
+
71
+ def __init__(self, message: str, **kwargs):
72
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
73
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
74
+ super().__init__(message, **kwargs)
75
+
76
+
77
+ class BatchEnginePartialError(BatchEngineError):
78
+ """Exception raised when a batch engine run has some successfull lines, mixed in
79
+ with some failures.
80
+
81
+ :param message: The error message.
82
+ :type message: str
83
+ """
84
+
85
+ def __init__(self, message: str, **kwargs):
86
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
87
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
88
+ super().__init__(message, **kwargs)
@@ -0,0 +1,132 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # Original source code: promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
6
+
7
+ import functools
8
+ import importlib
9
+ import logging
10
+
11
+ from contextvars import ContextVar
12
+ from typing import Any, Callable, Final, Generator, Optional, Protocol, Sequence, Tuple
13
+
14
+ from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
15
+ from azure.ai.evaluation._legacy._batch_engine._result import TokenMetrics
16
+
17
+
18
+ _token_metrics: ContextVar[TokenMetrics] = ContextVar("token_metrics", default=TokenMetrics(0, 0, 0))
19
+ KEY_ATTR_ORIGINAL: Final[str] = "_original"
20
+
21
+
22
+ class _TokenMetrics(Protocol):
23
+ """Protocol class to represent the token metrics."""
24
+
25
+ prompt_tokens: int
26
+ completion_tokens: int
27
+ total_tokens: int
28
+
29
+
30
+ class _WithUsage(Protocol):
31
+ """Protocol class to represent an OpenAI object that may have a token usage property/attribute."""
32
+
33
+ usage: Optional[_TokenMetrics]
34
+
35
+
36
+ def _wrap_openai_api_method(method: Callable, is_async: bool) -> Callable:
37
+ """Wraps the OpenAI API method to inject logic to run on the result of the call."""
38
+
39
+ def update_usage(result: _WithUsage) -> None:
40
+ if hasattr(result, "usage") and result.usage is not None:
41
+ usage = _token_metrics.get()
42
+ usage.prompt_tokens += result.usage.prompt_tokens
43
+ usage.completion_tokens += result.usage.completion_tokens
44
+ usage.total_tokens += result.usage.total_tokens
45
+
46
+ if is_async:
47
+
48
+ @functools.wraps(method)
49
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
50
+ result: _WithUsage = await method(*args, **kwargs)
51
+ update_usage(result)
52
+ return result
53
+
54
+ return async_wrapper
55
+ else:
56
+
57
+ @functools.wraps(method)
58
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
59
+ result: _WithUsage = method(*args, **kwargs)
60
+ update_usage(result)
61
+ return result
62
+
63
+ return sync_wrapper
64
+
65
+
66
+ def _openai_api_list() -> Generator[Tuple[Any, Callable, bool], None, None]:
67
+ """Load the list of OpenAI API classes and their corresponding method names."""
68
+
69
+ apis: Sequence[Tuple[str, str, str, bool]] = [
70
+ ("openai.resources.chat", "Completions", "create", False),
71
+ ("openai.resources.chat", "AsyncCompletions", "create", True),
72
+ ("openai.resources", "Completions", "create", False),
73
+ ("openai.resources", "AsyncCompletions", "create", True),
74
+ ("openai.resources", "Embeddings", "create", False),
75
+ ("openai.resources", "AsyncEmbeddings", "create", True),
76
+ ("openai.resources", "Responses", "create", False),
77
+ ("openai.resources", "AsyncResponses", "create", True),
78
+ ]
79
+
80
+ for module_name, class_name, method_name, is_async in apis:
81
+ try:
82
+ module = importlib.import_module(module_name)
83
+ cls = getattr(module, class_name, None)
84
+ if cls is None:
85
+ continue
86
+ method = getattr(cls, method_name, None)
87
+ if method is None:
88
+ continue
89
+ yield cls, method, is_async
90
+ except ImportError:
91
+ raise MissingRequiredPackage("Please install the 'openai' package to use the Azure AI Evaluation SDK")
92
+ except AttributeError:
93
+ logging.warning(
94
+ "The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name
95
+ )
96
+
97
+
98
+ def inject_openai_api():
99
+ """This function modifies the create methods of the OpenAI API classes to inject logic
100
+ to enable us to collect token usage data.
101
+ """
102
+ for cls, method, is_async in _openai_api_list():
103
+ # Check if the create method of the openai_api class has already been modified
104
+ if not hasattr(method, KEY_ATTR_ORIGINAL):
105
+ wrapper_method: Callable = _wrap_openai_api_method(method, is_async)
106
+ setattr(wrapper_method, KEY_ATTR_ORIGINAL, method)
107
+ setattr(cls, method.__name__, wrapper_method)
108
+
109
+
110
+ def recover_openai_api():
111
+ """This function restores the original create methods of the OpenAI API classes
112
+ by assigning them back from the _original attributes of the modified methods.
113
+ """
114
+ for cls, method, _ in _openai_api_list():
115
+ if hasattr(method, KEY_ATTR_ORIGINAL):
116
+ original_method = getattr(method, KEY_ATTR_ORIGINAL)
117
+ setattr(cls, method.__name__, original_method)
118
+
119
+
120
+ class CaptureOpenAITokenUsage:
121
+ """Context manager to capture OpenAI token usage."""
122
+
123
+ def __init__(self):
124
+ self._tokens = TokenMetrics(0, 0, 0)
125
+
126
+ def __enter__(self) -> TokenMetrics:
127
+ _token_metrics.set(TokenMetrics(0, 0, 0))
128
+ return self._tokens
129
+
130
+ def __exit__(self, exc_type: Optional[Exception], exc_value: Optional[Exception], traceback: Optional[Any]) -> None:
131
+ captured_metrics = _token_metrics.get()
132
+ self._tokens.update(captured_metrics)