azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,663 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # cspell:ignore apng, retriable
6
+
7
+ import copy
8
+ import os
9
+ import re
10
+ import json
11
+ import base64
12
+ from dataclasses import dataclass, is_dataclass, fields
13
+ from logging import Logger
14
+ from pathlib import Path
15
+ from typing import (
16
+ Any,
17
+ AsyncGenerator,
18
+ Dict,
19
+ Final,
20
+ List,
21
+ Mapping,
22
+ MutableMapping,
23
+ Optional,
24
+ Sequence,
25
+ Set,
26
+ Tuple,
27
+ Type,
28
+ TypeVar,
29
+ Union,
30
+ cast,
31
+ )
32
+
33
+ from jinja2 import Template
34
+ from openai import AsyncStream
35
+ from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionUserMessageParam
36
+ from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAIError
37
+
38
+ from azure.ai.evaluation._constants import DefaultOpenEncoding
39
+ from azure.ai.evaluation._legacy.prompty._exceptions import (
40
+ InvalidInputError,
41
+ JinjaTemplateError,
42
+ PromptyException,
43
+ )
44
+
45
+ from azure.ai.evaluation._legacy.prompty._yaml_utils import load_yaml
46
+
47
+
48
+ # region: Resolving references
49
+
50
+
51
+ @dataclass
52
+ class PromptyModelConfiguration:
53
+ """
54
+ A dataclass that represents a model config of prompty.
55
+
56
+ :param api: Type of the LLM request, default value is chat.
57
+ :type api: str
58
+ :param configuration: Prompty model connection configuration
59
+ :type configuration: dict
60
+ :param parameters: Params of the LLM request.
61
+ :type parameters: dict
62
+ :param response: Return the complete response or the first choice, default value is first.
63
+ :type response: str
64
+ """
65
+
66
+ configuration: dict
67
+ parameters: Dict[str, Any]
68
+ response: str = "first"
69
+ model: Optional[str] = None
70
+ # _overflow: Dict[str, Any] = field(default_factory=dict)
71
+
72
+ def __post_init__(self):
73
+ if not isinstance(self.configuration, dict):
74
+ raise PromptyException("The configuration of the model must be a dictionary.")
75
+
76
+ if not self.model:
77
+ self.model = self.configuration.get("azure_deployment", None) or self.configuration.get("model", None)
78
+
79
+
80
+ T = TypeVar("T")
81
+
82
+
83
+ def dataclass_from_dict(cls: Type[T], data: Dict[str, Any]) -> T:
84
+ """Helper function to make creating dataclass instances from dictionaries easier.
85
+ Unlike using cls(**data), this function will ignore any keys in the dictionary that
86
+ are not fields in the dataclass. If the dataclass optionally contains an _overflow
87
+ field, any extra key/value paris will be placed in that field.
88
+
89
+ This does no type checking and inspects only the key names.
90
+
91
+ :param Type[T] cls: The dataclass type to create.
92
+ :param Dict[str, Any] data: The dictionary to create the dataclass instance from.
93
+ :return: The dataclass instance.
94
+ :rtype: T
95
+ """
96
+ if not is_dataclass(cls):
97
+ raise ValueError("This function only works with @dataclass Types")
98
+
99
+ fields_set: Set[str] = {f.name for f in fields(cls)}
100
+
101
+ params: Dict[str, Any] = {}
102
+ overflow: Dict[str, Any] = {}
103
+
104
+ for key, value in data.items():
105
+ if key in fields_set:
106
+ params[key] = value
107
+ else:
108
+ overflow[key] = value
109
+
110
+ if "_overflow" in fields_set:
111
+ params["_overflow"] = overflow
112
+
113
+ return cast(T, cls(**params))
114
+
115
+
116
+ def resolve_references(origin: Mapping[str, Any], base_path: Optional[Path] = None) -> Dict[str, Any]:
117
+ """Resolve all reference in the object.
118
+
119
+ :param Mapping[str, Any] origin: The object to resolve.
120
+ :param Path|None base_path: The base path to resolve the file reference.
121
+ :return: The resolved object.
122
+ :rtype: Dict[str, Any]
123
+ """
124
+
125
+ def _resolve_references(origin: Any, base_path: Optional[Path] = None) -> Any:
126
+ if isinstance(origin, str):
127
+ return _resolve_reference(origin, base_path=base_path)
128
+ if isinstance(origin, list):
129
+ return [_resolve_references(item, base_path=base_path) for item in origin]
130
+ if isinstance(origin, dict):
131
+ return {key: _resolve_references(value, base_path=base_path) for key, value in origin.items()}
132
+ return origin
133
+
134
+ return {k: _resolve_references(v, base_path=base_path) for k, v in origin.items()}
135
+
136
+
137
+ def _resolve_reference(reference: str, base_path: Optional[Path] = None) -> Union[str, dict]:
138
+ """
139
+ Resolve the reference, two types are supported, env, file.
140
+ When the string format is ${env:ENV_NAME}, the environment variable value will be returned.
141
+ When the string format is ${file:file_path}, return the loaded json object.
142
+
143
+ :param str reference: The reference string.
144
+ :param Path|None base_path: The base path to resolve the file reference.
145
+ :return: The resolved reference.
146
+ :rtype: str | dict
147
+ """
148
+ pattern = r"\$\{(\w+):(.*)\}"
149
+ match = re.match(pattern, reference)
150
+ if match:
151
+ reference_type, value = match.groups()
152
+ if reference_type == "env":
153
+ return os.environ.get(value, reference)
154
+
155
+ if reference_type == "file":
156
+ if not Path(value).is_absolute() and base_path:
157
+ path = Path(base_path) / value
158
+ else:
159
+ path = Path(value)
160
+
161
+ if not path.exists():
162
+ raise PromptyException(f"Cannot find the reference file {value}.")
163
+
164
+ with open(path, "r", encoding=DefaultOpenEncoding.READ) as f:
165
+ if path.suffix.lower() == ".json":
166
+ return json.load(f)
167
+ if path.suffix.lower() in [".yml", ".yaml"]:
168
+ return load_yaml(f)
169
+ return f.read()
170
+
171
+ # TODO ralphe: logging?
172
+ # logger.warning(f"Unknown reference type {reference_type}, return original value {reference}.")
173
+ return reference
174
+
175
+ return reference
176
+
177
+
178
+ def update_dict_recursively(origin_dict: Mapping[str, Any], overwrite_dict: Mapping[str, Any]) -> Dict[str, Any]:
179
+ updated_dict: Dict[str, Any] = {}
180
+ for k, v in overwrite_dict.items():
181
+ if isinstance(v, dict):
182
+ updated_dict[k] = update_dict_recursively(origin_dict.get(k, {}), v)
183
+ else:
184
+ updated_dict[k] = v
185
+ for k, v in origin_dict.items():
186
+ if k not in updated_dict:
187
+ updated_dict[k] = v
188
+ return updated_dict
189
+
190
+
191
+ # endregion
192
+
193
+
194
+ # region: Jinja template rendering
195
+
196
+ VALID_ROLES = ["system", "user", "assistant", "function"]
197
+ """Valid roles for the OpenAI Chat API"""
198
+
199
+ PROMPTY_ROLE_SEPARATOR_PATTERN = re.compile(
200
+ r"(?i)^\s*#?\s*(" + "|".join(VALID_ROLES) + r")\s*:\s*\n", flags=re.MULTILINE
201
+ )
202
+ """Pattern to match the role separator in a prompty template"""
203
+
204
+ MARKDOWN_IMAGE_PATTERN = re.compile(r"(?P<match>!\[[^\]]*\]\(.*?(?=\"|\))\))", flags=re.MULTILINE)
205
+ """Pattern to match markdown syntax for embedding images such as ![alt text](url).
206
+ This uses a 'hack' where by naming the capture group, using re.split() will cause
207
+ the named capture group to appear in the list of split parts"""
208
+
209
+ IMAGE_URL_PARSING_PATTERN = re.compile(
210
+ r"^!\[(?P<alt_text>[^\]]+)\]\((?P<link>(?P<scheme>[^:]+(?=:))?:?(?P<mime_type>[^;]+(?=;))?;?(?P<data>[^\)]*))\)$"
211
+ )
212
+ """Pattern used to parse the image URL from the markdown syntax. This caputres the following groups:
213
+ - alt_text: The alt text for the image
214
+ - link: The full link
215
+ - scheme: The scheme used in the link (e.g. data, http, https)
216
+ - mime_type: The mime type of the image (only for data URLs)
217
+ - data: The data part of the URL (only for data URLs)
218
+ """
219
+
220
+ DEFAULT_IMAGE_MIME_TYPE: Final[str] = "image/*"
221
+ """The mime type to use when we don't know the image type"""
222
+
223
+ FILE_EXT_TO_MIME: Final[Mapping[str, str]] = {
224
+ ".apng": "image/apng",
225
+ ".avif": "image/avif",
226
+ ".bmp": "image/bmp",
227
+ ".gif": "image/gif",
228
+ ".heic": "image/heic",
229
+ ".heif": "image/heif",
230
+ ".ico": "image/vnd.microsoft.icon",
231
+ ".jpg": "image/jpeg",
232
+ ".jpeg": "image/jpeg",
233
+ ".png": "image/png",
234
+ ".svg": "image/svg+xml",
235
+ ".tif": "image/tiff",
236
+ ".tiff": "image/tiff",
237
+ ".webp": "image/webp",
238
+ }
239
+ """Mapping of file extensions to mime types"""
240
+
241
+
242
+ def render_jinja_template(template_str: str, *, trim_blocks=True, keep_trailing_newline=True, **kwargs) -> str:
243
+ try:
244
+ template = Template(template_str, trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline)
245
+ return template.render(**kwargs)
246
+ except Exception as e: # pylint: disable=broad-except
247
+ raise PromptyException(f"Failed to render jinja template - {type(e).__name__}: {str(e)}") from e
248
+
249
+
250
+ def build_messages(
251
+ *, prompt: str, working_dir: Path, image_detail: str = "auto", **kwargs: Any
252
+ ) -> Sequence[Mapping[str, Any]]:
253
+ # keep_trailing_newline=True is to keep the last \n in the prompt to avoid converting "user:\t\n" to "user:".
254
+ chat_str = render_jinja_template(prompt, trim_blocks=True, keep_trailing_newline=True, **kwargs)
255
+ messages = _parse_chat(chat_str, working_dir, image_detail)
256
+ return messages
257
+
258
+
259
+ def _parse_chat(chat_str: str, working_dir: Path, image_detail: str) -> Sequence[Mapping[str, Any]]:
260
+ # openai chat api only supports VALID_ROLES as role names.
261
+ # customer can add single # in front of role name for markdown highlight.
262
+ # and we still support role name without # prefix for backward compatibility.
263
+
264
+ chunks = re.split(PROMPTY_ROLE_SEPARATOR_PATTERN, chat_str)
265
+ chat_list: List[Dict[str, Any]] = []
266
+
267
+ for chunk in chunks:
268
+ last_message = chat_list[-1] if len(chat_list) > 0 else None
269
+
270
+ # =======================================================================================================
271
+ # NOTE: The Promptflow code supported tool calls but used eval() to parse them. This is an unacceptable
272
+ # security risk. Since none of the current evaluators use tool calls, this functionality has been
273
+ # removed.
274
+ # =======================================================================================================
275
+
276
+ # if is_tool_chunk(last_message):
277
+ # parse_tools(last_message, chunk, hash2images, image_detail)
278
+ # continue
279
+ # if last_message and "role" in last_message and last_message["role"] == "assistant":
280
+ # parsed_result = _try_parse_tool_calls(chunk)
281
+ # if parsed_result is not None:
282
+ # last_message["tool_calls"] = parsed_result
283
+ # continue
284
+
285
+ if (
286
+ last_message
287
+ and "role" in last_message # pylint: disable=unsupported-membership-test
288
+ and "content" not in last_message # pylint: disable=unsupported-membership-test
289
+ and "tool_calls" not in last_message # pylint: disable=unsupported-membership-test
290
+ ):
291
+ parsed_result = _try_parse_name_and_content(chunk)
292
+ if parsed_result is None:
293
+ if last_message["role"] == "function": # pylint: disable=unsubscriptable-object
294
+ # "name" is required if the role is "function"
295
+ raise JinjaTemplateError(
296
+ "Failed to parse function role prompt. Please make sure the prompt follows the "
297
+ "format: 'name:\\nfunction_name\\ncontent:\\nfunction_content'. "
298
+ "'name' is required if role is function, and it should be the name of the function "
299
+ "whose response is in the content. May contain a-z, A-Z, 0-9, and underscores, "
300
+ "with a maximum length of 64 characters. See more details in "
301
+ "https://platform.openai.com/docs/api-reference/chat/create#chat/create-name"
302
+ )
303
+
304
+ # "name" is optional for other role types.
305
+ last_message["content"] = _to_content_str_or_list( # pylint: disable=unsupported-assignment-operation
306
+ chunk, working_dir, image_detail
307
+ )
308
+ else:
309
+ last_message["name"] = parsed_result[0] # pylint: disable=unsupported-assignment-operation
310
+ last_message["content"] = _to_content_str_or_list( # pylint: disable=unsupported-assignment-operation
311
+ parsed_result[1], working_dir, image_detail
312
+ )
313
+ else:
314
+ if chunk.strip() == "":
315
+ continue
316
+ # Check if prompt follows chat api message format and has valid role.
317
+ # References: https://platform.openai.com/docs/api-reference/chat/create.
318
+ role = chunk.strip().lower()
319
+ _validate_role(role)
320
+ new_message = {"role": role}
321
+ chat_list.append(new_message)
322
+ return chat_list
323
+
324
+
325
+ def _validate_role(role: str):
326
+ if role not in VALID_ROLES:
327
+ valid_roles_str = ", ".join(VALID_ROLES)
328
+ error_message = (
329
+ f"The Chat API requires a specific format for prompt definition, and the prompt should include separate "
330
+ f"lines as role delimiters: {valid_roles_str}.\n"
331
+ f"Current parsed role '{role}' does not meet the requirement. If you intend to use the Completion API, "
332
+ f"please select the appropriate API type and deployment name."
333
+ )
334
+ raise JinjaTemplateError(message=error_message)
335
+
336
+
337
+ def _to_content_str_or_list(chat_str: str, working_dir: Path, image_detail: str) -> Union[str, List[Dict[str, Any]]]:
338
+ chunks = [c for c in (chunk.strip() for chunk in re.split(MARKDOWN_IMAGE_PATTERN, chat_str)) if c]
339
+ if len(chunks) <= 1:
340
+ return chat_str.strip()
341
+
342
+ messages: List[Dict[str, Any]] = []
343
+ for chunk in chunks:
344
+ if chunk.startswith("![") and chunk.endswith(")"):
345
+ messages.append(_inline_image(chunk, working_dir, image_detail))
346
+ else:
347
+ messages.append({"type": "text", "text": chunk})
348
+ return messages
349
+
350
+
351
+ def _inline_image(image: str, working_dir: Path, image_detail: str) -> Dict[str, Any]:
352
+ """This accepts an image URL in markdown format, and parses that into a message containing the image details
353
+ to be sent to AI service. In the case of local file images, they will be loaded and their contents encoded
354
+ into a base 64 data URI. Internal URLs will remained untouched. It can can accept http(s), ftp(s), as well
355
+ as data URIs.
356
+
357
+ :param str image: The image URL in markdown format (e.g. ![alternative text](https://www.bing.com/favicon.ico))
358
+ :param Path working_dir: The working directory to use when resolving relative file paths
359
+ :param str image_detail: The image detail to use when sending the image to the AI service
360
+ :return: The image message to send to the AI service
361
+ :rtype: Mapping[str, Any]"""
362
+
363
+ def local_to_base64(local_file: str, mime_type: Optional[str]) -> str:
364
+ path = Path(local_file)
365
+ if not path.is_absolute():
366
+ path = working_dir / local_file
367
+ if not path.exists():
368
+ # TODO ralphe logging?
369
+ # logger.warning(f"Cannot find the image path {image_content},
370
+ # it will be regarded as {type(image_str)}.")
371
+ raise InvalidInputError(f"Cannot find the image path '{path.as_posix()}'")
372
+
373
+ base64_encoded = base64.b64encode(path.read_bytes()).decode("utf-8")
374
+ if not mime_type:
375
+ mime_type = FILE_EXT_TO_MIME.get(path.suffix.lower(), DEFAULT_IMAGE_MIME_TYPE)
376
+ return f"data:{mime_type};base64,{base64_encoded}"
377
+
378
+ match = re.match(IMAGE_URL_PARSING_PATTERN, image)
379
+ if not match:
380
+ raise InvalidInputError(f"Invalid image URL '{image}'")
381
+
382
+ inlined_uri: str
383
+ mime_type: Optional[str] = None
384
+
385
+ scheme: str = (match.group("scheme") or "").strip().lower()
386
+ if scheme in ["http", "https", "ftp", "ftps"]:
387
+ # nothing special to do here, pass through full URI as is
388
+ inlined_uri = (match.group("link") or "").strip()
389
+ elif scheme == "data":
390
+ mime_type = (match.group("mime_type") or "").strip()
391
+ data: str = (match.group("data") or "").strip()
392
+
393
+ # data urls may contain local paths too
394
+ if data[:5].lower() == "path:":
395
+ inlined_uri = local_to_base64(data[5:].strip(), mime_type)
396
+ elif data[:6].lower() == "base64":
397
+ # nothing special to do here, pass through full URI as is
398
+ inlined_uri = (match.group("link") or "").strip()
399
+ else:
400
+ raise InvalidInputError(f"Invalid image data URL '{image}'")
401
+ else:
402
+ # assume it's a file path
403
+ inlined_uri = local_to_base64((match.group("link") or "").strip(), mime_type)
404
+
405
+ if not inlined_uri:
406
+ raise InvalidInputError(f"Failed to determine how to inline the following image URL '{image}'")
407
+
408
+ return {
409
+ "type": "image_url",
410
+ "image_url": {
411
+ "url": inlined_uri,
412
+ "detail": image_detail,
413
+ },
414
+ }
415
+
416
+
417
+ def _try_parse_name_and_content(role_prompt: str) -> Optional[Tuple[str, str]]:
418
+ # customer can add ## in front of name/content for markdown highlight.
419
+ # and we still support name/content without ## prefix for backward compatibility.
420
+ # TODO ralphe: This maybe has something to do with parsing functions or tool calls but I'm not sure
421
+ pattern = r"\n*#{0,2}\s*name\s*:\s*\n+\s*(\S+)\s*\n*#{0,2}\s*content\s*:\s*\n?(.*)"
422
+ match = re.search(pattern, role_prompt, re.DOTALL)
423
+ if match:
424
+ return match.group(1), match.group(2)
425
+ return None
426
+
427
+
428
+ # endregion
429
+
430
+
431
+ # region OpenAI connections and requests
432
+
433
+ OpenAIChatResponseType = Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]
434
+
435
+
436
+ def prepare_open_ai_request_params(
437
+ model_config: PromptyModelConfiguration, template: Union[str, Sequence[Mapping[str, Any]]]
438
+ ) -> MutableMapping[str, Any]:
439
+ params = copy.deepcopy(model_config.parameters)
440
+ # if isinstance(connection, AzureOpenAIConnection):
441
+ # params.setdefault("extra_headers", {}).update({"ms-azure-ai-promptflow-called-from": "promptflow-core"})
442
+ params["model"] = model_config.model
443
+ params["messages"] = template
444
+
445
+ # NOTE:
446
+ # - Tool calls have been disabled due to a security issue in the implementation. See comment earlier in
447
+ # this file for more details
448
+ # - Removing the validation of function calls in favour of letting the service do that validation. This
449
+ # removes a maintenance burden from the SDK should the service definition for function calls change.
450
+
451
+ # # functions and function_call are deprecated and are replaced by tools and tool_choice.
452
+ # # if both are provided, tools and tool_choice are used and functions and function_call are ignored.
453
+ # if "tools" in params:
454
+ # validate_tools(params["tools"])
455
+ # params["tool_choice"] = validate_tool_choice(params.get("tool_choice", None))
456
+ # else:
457
+ # if "functions" in params:
458
+ # _validate_functions(params["functions"])
459
+ # params["function_call"] = validate_function_call(params.get("function_call", None))
460
+
461
+ return params
462
+
463
+
464
+ async def format_llm_response(
465
+ response: OpenAIChatResponseType,
466
+ is_first_choice: bool,
467
+ response_format: Optional[Mapping[str, Any]] = None,
468
+ outputs: Optional[Mapping[str, Any]] = None,
469
+ inputs: Optional[Mapping[str, Any]] = None,
470
+ ) -> dict:
471
+ """
472
+ Format LLM response
473
+
474
+ If is_first_choice is false, it will directly return LLM response.
475
+ If is_first_choice is true, behavior as blow:
476
+ response_format: type: text
477
+ - n: None/1/2
478
+ Return the first choice content. Return type is string.
479
+ - stream: True
480
+ Return generator list of first choice content. Return type is generator[str]
481
+ response_format: type: json_object
482
+ - n : None/1/2
483
+ Return json dict of the first choice. Return type is dict
484
+ - stream: True
485
+ Return json dict of the first choice. Return type is dict
486
+ - outputs
487
+ Extract corresponding output in the json dict to the first choice. Return type is dict.
488
+
489
+ :param response: LLM response.
490
+ :type response:
491
+ :param is_first_choice: If true, it will return the first item in response choices, else it will return all response
492
+ :type is_first_choice: bool
493
+ :param response_format: An object specifying the format that the model must output.
494
+ :type response_format: str
495
+ :param outputs: Extract corresponding output in json format response
496
+ :type outputs: dict
497
+ :return: Formatted LLM response.
498
+ :rtype: Union[str, dict, Response]
499
+ """
500
+
501
+ def format_choice(item: str) -> Union[str, Mapping[str, Any]]:
502
+ # response_format is one of text or json_object.
503
+ # https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format
504
+ if not is_json_format:
505
+ return item
506
+
507
+ result_dict = json.loads(item)
508
+ if not outputs:
509
+ return result_dict
510
+
511
+ # return the keys in outputs
512
+ output_results = {}
513
+ for key in outputs:
514
+ if key not in result_dict:
515
+ raise InvalidInputError(f"Cannot find '{key}' in response {list(result_dict.keys())}")
516
+ output_results[key] = result_dict[key]
517
+ return output_results
518
+
519
+ async def format_stream(llm_response: AsyncStream[ChatCompletionChunk]) -> AsyncGenerator[str, None]:
520
+ cur_index = None
521
+ async for chunk in llm_response:
522
+ if len(chunk.choices) > 0 and chunk.choices[0].delta.content:
523
+ if cur_index is None:
524
+ cur_index = chunk.choices[0].index
525
+ if cur_index != chunk.choices[0].index:
526
+ return
527
+ yield chunk.choices[0].delta.content
528
+
529
+ to_ret = {
530
+ "llm_output": None,
531
+ "input_token_count": 0,
532
+ "output_token_count": 0,
533
+ "total_token_count": 0,
534
+ "finish_reason": "",
535
+ "model_id": "",
536
+ "sample_input": "",
537
+ "sample_output": "",
538
+ }
539
+
540
+ if not is_first_choice:
541
+ to_ret["llm_output"] = response
542
+ return to_ret # we don't actually use this code path since streaming is not used, so set token counts to 0
543
+
544
+ is_json_format = isinstance(response_format, dict) and response_format.get("type") == "json_object"
545
+ if isinstance(response, AsyncStream):
546
+ if not is_json_format:
547
+ to_ret["llm_output"] = format_stream(llm_response=response)
548
+ return to_ret
549
+ content = "".join([item async for item in format_stream(llm_response=response)])
550
+ to_ret["llm_output"] = format_choice(content)
551
+ return to_ret # we don't actually use this code path since streaming is not used, so set token counts to 0
552
+ else:
553
+ input_token_count = response.usage.prompt_tokens if response.usage and response.usage.prompt_tokens else 0
554
+ output_token_count = (
555
+ response.usage.completion_tokens if response.usage and response.usage.completion_tokens else 0
556
+ )
557
+ total_token_count = response.usage.total_tokens if response.usage and response.usage.total_tokens else 0
558
+ finish_reason = (
559
+ response.choices[0].finish_reason if response.choices and response.choices[0].finish_reason else ""
560
+ )
561
+ model_id = response.model if response.model else ""
562
+ sample_output_list = (
563
+ [{"role": response.choices[0].message.role, "content": response.choices[0].message.content}]
564
+ if (response.choices and response.choices[0].message.content and response.choices[0].message.role)
565
+ else []
566
+ )
567
+ sample_output = json.dumps(sample_output_list)
568
+ input_str = f"{json.dumps(inputs)}" if inputs else ""
569
+ if inputs and len(inputs) > 0:
570
+ sample_input_json = []
571
+ msg = ChatCompletionUserMessageParam(
572
+ role="user",
573
+ content=input_str,
574
+ )
575
+ sample_input_json.append(msg)
576
+ sample_input = json.dumps(sample_input_json)
577
+
578
+ # When calling function/tool, function_call/tool_call response will be returned as a field in message,
579
+ # so we need return message directly. Otherwise, we only return content.
580
+ # https://platform.openai.com/docs/api-reference/chat/object#chat/object-choices
581
+ if response.choices[0].finish_reason in ["tool_calls", "function_calls"]:
582
+ response_content = response.model_dump()["choices"][0]["message"]
583
+ else:
584
+ response_content = getattr(response.choices[0].message, "content", "")
585
+ result = format_choice(response_content)
586
+ to_ret["llm_output"] = result
587
+ to_ret["input_token_count"] = input_token_count
588
+ to_ret["output_token_count"] = output_token_count
589
+ to_ret["total_token_count"] = total_token_count
590
+ to_ret["finish_reason"] = finish_reason
591
+ to_ret["model_id"] = model_id
592
+ to_ret["sample_input"] = sample_input
593
+ to_ret["sample_output"] = sample_output
594
+ return to_ret
595
+
596
+
597
+ def openai_error_retryable(
598
+ error: OpenAIError, retry: int, entity_retry: List[int], max_entity_retries: int
599
+ ) -> Tuple[bool, float]:
600
+ """
601
+ Determines if an OpenAI error is retryable, and optionally determines the min retry delay to use.
602
+ If none is returned, the caller will determine the delay to use.
603
+
604
+ :param OpenAIError error: The error to handle
605
+ :param int retry: The current retry count (0 means we're on the first attempt and no retries have been made)
606
+ :param List[int] entity_retry: The current retry count for the unprocessable entity failures. This should be a
607
+ list containing only 1 element to mimic pass by reference semantics. A value of 0 means we're on the
608
+ first attempt and no retries have been made.
609
+ :param int max_entity_retries: The maximum number of retries to make for unprocessable entity failures
610
+ :return: A tuple containing whether the error is retryable and the min delay to use if any
611
+ :rtype: Tuple[bool, Optional[float]]
612
+ """
613
+
614
+ # Using https://platform.openai.com/docs/guides/error-codes/api-errors#python-library-error-types as a reference
615
+
616
+ should_retry: bool
617
+ delay: Optional[float] = None
618
+
619
+ if isinstance(error, APIConnectionError):
620
+ retriable_error_messages: Sequence[str] = [
621
+ "connection aborted",
622
+ # issue 2296
623
+ "server disconnected without sending a response",
624
+ ]
625
+ should_retry = (
626
+ isinstance(error, APITimeoutError) # APITimeoutError is a subclass of APIConnectionError
627
+ or str(error).lower() in retriable_error_messages
628
+ or str(error.__cause__).lower() in retriable_error_messages
629
+ )
630
+ elif isinstance(error, APIStatusError):
631
+ status_code: int = error.response.status_code
632
+ if status_code == 422:
633
+ # As per the original legacy code, UnprocessableEntityError (HTTP 422) should be handled differently
634
+ # with a smaller retry count, as retrying more may not be beneficial.
635
+ should_retry = entity_retry[0] < max_entity_retries
636
+ entity_retry[0] += 1
637
+ elif status_code == 429:
638
+ # Two types, one is you are throttled and so should retry after a delay, the other is you have exceeded
639
+ # your quota and should not retry.
640
+ if (error.type or "").lower() == "insufficient_quota":
641
+ should_retry = False
642
+ else:
643
+ should_retry = True
644
+ should_retry = error.type != "insufficient_quota"
645
+ else:
646
+ should_retry = status_code >= 500
647
+
648
+ # Use what the service tells us to use for the delay if it's provided
649
+ if should_retry and not delay:
650
+ delay_str = error.response.headers.get("Retry-After", None)
651
+ if delay_str is not None:
652
+ delay = float(delay_str)
653
+ else:
654
+ should_retry = False
655
+
656
+ # Use exponential backoff for retries if the service doesn't provide a delay
657
+ if not delay:
658
+ delay = min(60, 2 + 2**retry)
659
+
660
+ return (should_retry, delay)
661
+
662
+
663
+ # endregion