azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,430 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import asyncio
6
+ import re
7
+
8
+ from logging import Logger
9
+ from os import PathLike
10
+ from pathlib import Path
11
+ from typing import Any, AsyncGenerator, Awaitable, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union, cast
12
+
13
+ from openai import AsyncAzureOpenAI, AsyncOpenAI, NotGiven, OpenAIError
14
+ from openai.lib.azure import AsyncAzureADTokenProvider
15
+ from azure.core.credentials import TokenCredential
16
+ from azure.core.credentials_async import AsyncTokenCredential
17
+
18
+ from azure.ai.evaluation._exceptions import ErrorTarget
19
+ from azure.ai.evaluation._constants import DefaultOpenEncoding, TokenScope
20
+ from azure.ai.evaluation._legacy.prompty._exceptions import (
21
+ InvalidInputError,
22
+ PromptyException,
23
+ MissingRequiredInputError,
24
+ NotSupportedError,
25
+ WrappedOpenAIError,
26
+ )
27
+ from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection, Connection, OpenAIConnection
28
+ from azure.ai.evaluation._legacy.prompty._yaml_utils import load_yaml_string
29
+ from azure.ai.evaluation._legacy.prompty._utils import (
30
+ dataclass_from_dict,
31
+ PromptyModelConfiguration,
32
+ OpenAIChatResponseType,
33
+ build_messages,
34
+ format_llm_response,
35
+ openai_error_retryable,
36
+ prepare_open_ai_request_params,
37
+ resolve_references,
38
+ update_dict_recursively,
39
+ )
40
+ from azure.ai.evaluation._constants import DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
41
+ from azure.ai.evaluation._legacy._common._logging import get_logger
42
+ from azure.ai.evaluation._legacy._common._async_token_provider import AsyncAzureTokenProvider
43
+ from azure.ai.evaluation._user_agent import UserAgentSingleton
44
+
45
+ PROMPTY_EXTENSION: Final[str] = ".prompty"
46
+
47
+
48
+ class AsyncPrompty:
49
+ """A prompty is a prompt with predefined metadata like inputs, and can be executed directly like a flow.
50
+ A prompty is represented as a templated markdown file with a modified front matter.
51
+ The front matter is a yaml file that contains meta fields like model configuration, inputs, etc..
52
+
53
+ Prompty example:
54
+ .. code-block::
55
+
56
+ ---
57
+ name: Hello Prompty
58
+ description: A basic prompt
59
+ model:
60
+ api: chat
61
+ configuration:
62
+ type: azure_openai
63
+ azure_deployment: gpt-35-turbo
64
+ api_key="${env:AZURE_OPENAI_API_KEY}",
65
+ api_version=${env:AZURE_OPENAI_API_VERSION}",
66
+ azure_endpoint="${env:AZURE_OPENAI_ENDPOINT}",
67
+ parameters:
68
+ max_tokens: 128
69
+ temperature: 0.2
70
+ inputs:
71
+ text:
72
+ type: string
73
+ ---
74
+ system:
75
+ Write a simple {{text}} program that displays the greeting message.
76
+
77
+ Prompty as function example:
78
+
79
+ .. code-block:: python
80
+
81
+ from azure.ai.evaluation._legacy.prompty import AsyncPrompty
82
+ prompty = Prompty(path="path/to/prompty.prompty")
83
+ result = prompty(input_a=1, input_b=2)
84
+
85
+ # Override model config with dict
86
+ model_config = {
87
+ "api": "chat",
88
+ "configuration": {
89
+ "type": "azure_openai",
90
+ "azure_deployment": "gpt-35-turbo",
91
+ "api_key": "${env:AZURE_OPENAI_API_KEY}",
92
+ "api_version": "${env:AZURE_OPENAI_API_VERSION}",
93
+ "azure_endpoint": "${env:AZURE_OPENAI_ENDPOINT}",
94
+ },
95
+ "parameters": {
96
+ "max_token": 512
97
+ }
98
+ }
99
+ prompty = Prompty.load(source="path/to/prompty.prompty", model=model_config)
100
+ result = prompty(input_a=1, input_b=2)
101
+
102
+ # Override model config with configuration
103
+ from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection
104
+ model_config = {
105
+ "api": "chat",
106
+ "configuration": AzureOpenAIModelConfiguration(
107
+ azure_deployment="gpt-35-turbo",
108
+ api_key="${env:AZURE_OPENAI_API_KEY}",
109
+ api_version="${env:AZURE_OPENAI_API_VERSION}",
110
+ azure_endpoint="${env:AZURE_OPENAI_ENDPOINT}",
111
+ ),
112
+ "parameters": {
113
+ "max_token": 512
114
+ }
115
+ }
116
+ prompty = Prompty(path="path/to/prompty.prompty", model=model_config)
117
+ result = prompty(input_a=1, input_b=2)
118
+
119
+ # Override model config with created connection
120
+ from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection
121
+ model_config = {
122
+ "api": "chat",
123
+ "configuration": AzureOpenAIModelConfiguration(
124
+ connection="azure_open_ai_connection",
125
+ azure_deployment="gpt-35-turbo",
126
+ ),
127
+ "parameters": {
128
+ "max_token": 512
129
+ }
130
+ }
131
+ prompty = Prompty(path="path/to/prompty.prompty", model=model_config)
132
+ result = prompty(input_a=1, input_b=2)
133
+ """
134
+
135
+ def __init__(
136
+ self,
137
+ path: Union[str, PathLike],
138
+ *,
139
+ logger: Optional[Logger] = None,
140
+ token_credential: Optional[Union[TokenCredential, AsyncTokenCredential]] = None,
141
+ is_reasoning_model: bool = False,
142
+ **kwargs: Any,
143
+ ):
144
+ path = Path(path)
145
+ configs, self._template = self._parse_prompty(path)
146
+
147
+ if is_reasoning_model:
148
+ parameters = configs.get("model", {}).get("parameters", {})
149
+ if "max_tokens" in parameters:
150
+ parameters.pop("max_tokens", None)
151
+ parameters["max_completion_tokens"] = DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
152
+ # Remove unsupported parameters for reasoning models
153
+ for key in ["temperature", "top_p", "presence_penalty", "frequency_penalty"]:
154
+ parameters.pop(key, None)
155
+
156
+ configs = resolve_references(configs, base_path=path.parent)
157
+ configs = update_dict_recursively(configs, resolve_references(kwargs, base_path=path.parent))
158
+
159
+ if configs["model"].get("api") == "completion":
160
+ raise InvalidInputError(
161
+ "Prompty does not support the completion API. Please use the 'chat' completions API instead."
162
+ )
163
+
164
+ self._data = configs
165
+ self._path = path
166
+ self._model = dataclass_from_dict(PromptyModelConfiguration, configs["model"])
167
+ self._inputs: Dict[str, Any] = configs.get("inputs", {})
168
+ self._outputs: Dict[str, Any] = configs.get("outputs", {})
169
+ self._name: str = configs.get("name", path.stem)
170
+ self._logger = logger or get_logger(__name__)
171
+ self._token_credential: Union[TokenCredential, AsyncTokenCredential] = (
172
+ token_credential or AsyncAzureTokenProvider()
173
+ )
174
+
175
+ @property
176
+ def path(self) -> Path:
177
+ """Path of the prompty file.
178
+
179
+ :return: The path of the prompty file.
180
+ :rtype: Path
181
+ """
182
+ return self._path
183
+
184
+ @property
185
+ def name(self) -> str:
186
+ """Name of the prompty.
187
+
188
+ :return: The name of the prompty.
189
+ :rtype: str
190
+ """
191
+ return self._name
192
+
193
+ @property
194
+ def description(self) -> Optional[str]:
195
+ """Description of the prompty.
196
+
197
+ :return: The description of the prompty.
198
+ :rtype: str
199
+ """
200
+ return self._data.get("description")
201
+
202
+ @classmethod
203
+ def load(
204
+ cls,
205
+ source: Union[str, PathLike],
206
+ **kwargs,
207
+ ) -> "AsyncPrompty":
208
+ """
209
+ Loads the prompty file.
210
+
211
+ :param source: The local prompty file. Must be a path to a local file.
212
+ An exception is raised if the file does not exist.
213
+ :type source: Union[PathLike, str]
214
+ :return: A Prompty object
215
+ :rtype: Prompty
216
+ """
217
+ source_path = Path(source)
218
+ if not source_path.exists():
219
+ raise PromptyException(f"Source {source_path.absolute().as_posix()} does not exist")
220
+
221
+ if source_path.suffix != PROMPTY_EXTENSION:
222
+ raise PromptyException("Source must be a file with .prompty extension.")
223
+
224
+ return cls(path=source_path, **kwargs)
225
+
226
+ @staticmethod
227
+ def _parse_prompty(path) -> Tuple[Dict[str, Any], str]:
228
+ with open(path, "r", encoding=DefaultOpenEncoding.READ) as f:
229
+ prompty_content = f.read()
230
+ pattern = r"-{3,}\n(.*)-{3,}\n(.*)"
231
+ result = re.search(pattern, prompty_content, re.DOTALL)
232
+ if not result:
233
+ raise PromptyException(
234
+ "Illegal formatting of prompty. The prompt file is in markdown format and can be divided into two "
235
+ "parts, the first part is in YAML format and contains connection and model information. The second "
236
+ "part is the prompt template."
237
+ )
238
+ config_content, prompt_template = result.groups()
239
+ configs = load_yaml_string(config_content)
240
+ return configs, prompt_template
241
+
242
+ def _resolve_inputs(self, input_values: Dict[str, Any]) -> Mapping[str, Any]:
243
+ """
244
+ Resolve prompty inputs. If not provide input_values, sample data will be regarded as input value.
245
+ For inputs are not provided, the default value in the input signature will be used.
246
+
247
+ :param Dict[str, Any] input_values: The input values provided by the user.
248
+ :return: The resolved inputs.
249
+ :rtype: Mapping[str, Any]
250
+ """
251
+
252
+ resolved_inputs: Dict[str, Any] = {}
253
+ missing_inputs: List[str] = []
254
+ for input_name, value in self._inputs.items():
255
+ if input_name not in input_values and "default" not in value:
256
+ missing_inputs.append(input_name)
257
+ continue
258
+
259
+ resolved_inputs[input_name] = input_values.get(input_name, value.get("default", None))
260
+
261
+ if missing_inputs:
262
+ raise MissingRequiredInputError(f"Missing required inputs: {missing_inputs}")
263
+
264
+ return resolved_inputs
265
+
266
+ async def __call__( # pylint: disable=docstring-keyword-should-match-keyword-only
267
+ self,
268
+ **kwargs: Any,
269
+ ) -> dict:
270
+ """Calling prompty as a function in async, the inputs should be provided with key word arguments.
271
+ Returns the output of the prompty.
272
+
273
+ The function call throws PromptyException if the Prompty file is not valid or the inputs are not valid.
274
+
275
+ :keyword kwargs: Additional keyword arguments passed to the parent class.
276
+ :paramtype kwargs: Any
277
+ :return: The output of the prompty.
278
+ :rtype: ChatCompletion | AsyncStream[ChatCompletionChunk] | AsyncGenerator[str] | str | Mapping[str, Any]
279
+ """
280
+
281
+ inputs = self._resolve_inputs(kwargs)
282
+ connection = Connection.parse_from_config(self._model.configuration)
283
+ messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
284
+ params = prepare_open_ai_request_params(self._model, messages)
285
+
286
+ timeout: Optional[float] = None
287
+ if timeout_val := cast(Any, kwargs.get("timeout", None)):
288
+ timeout = float(timeout_val)
289
+
290
+ # disable OpenAI's built-in retry mechanism by using our own retry
291
+ # for better debugging and real-time status updates.
292
+ max_retries = 0
293
+
294
+ default_headers = {"User-Agent": UserAgentSingleton().value}
295
+
296
+ api_client: Union[AsyncAzureOpenAI, AsyncOpenAI]
297
+ if isinstance(connection, AzureOpenAIConnection):
298
+ api_client = AsyncAzureOpenAI(
299
+ azure_endpoint=connection.azure_endpoint,
300
+ api_key=connection.api_key,
301
+ azure_deployment=connection.azure_deployment,
302
+ api_version=connection.api_version,
303
+ max_retries=max_retries,
304
+ azure_ad_token_provider=(
305
+ self.get_token_provider(self._token_credential) if not connection.api_key else None
306
+ ),
307
+ default_headers=default_headers,
308
+ )
309
+ elif isinstance(connection, OpenAIConnection):
310
+ api_client = AsyncOpenAI(
311
+ base_url=connection.base_url,
312
+ api_key=connection.api_key,
313
+ organization=connection.organization,
314
+ max_retries=max_retries,
315
+ default_headers=default_headers,
316
+ )
317
+ else:
318
+ raise NotSupportedError(
319
+ f"'{type(connection).__name__}' is not a supported connection type.", target=ErrorTarget.EVAL_RUN
320
+ )
321
+
322
+ response: OpenAIChatResponseType = await self._send_with_retries(
323
+ api_client=api_client,
324
+ params=params,
325
+ timeout=timeout,
326
+ )
327
+
328
+ return await format_llm_response(
329
+ response=response,
330
+ is_first_choice=self._data.get("model", {}).get("response", "first").lower() == "first",
331
+ response_format=params.get("response_format", {}),
332
+ outputs=self._outputs,
333
+ inputs=inputs,
334
+ )
335
+
336
+ def render( # pylint: disable=docstring-keyword-should-match-keyword-only
337
+ self, **kwargs: Any
338
+ ) -> Sequence[Mapping[str, Any]]:
339
+ """Render the prompt content.
340
+
341
+ :keyword kwargs: Additional keyword arguments passed to the parent class.
342
+ :paramtype kwargs: Any
343
+ :return: Prompt content
344
+ :rtype: Sequence[Mapping[str, Any]]
345
+ """
346
+
347
+ inputs = self._resolve_inputs(kwargs)
348
+ messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
349
+ return messages
350
+
351
+ async def _send_with_retries(
352
+ self,
353
+ api_client: Union[AsyncAzureOpenAI, AsyncOpenAI],
354
+ params: Mapping[str, Any],
355
+ timeout: Optional[float],
356
+ max_retries: int = 10,
357
+ max_entity_retries: int = 3,
358
+ ) -> OpenAIChatResponseType:
359
+ """Send the request with retries.
360
+
361
+ :param Union[AsyncAzureOpenAI, AsyncOpenAI] api_client: The OpenAI client.
362
+ :param Mapping[str, Any] params: The request parameters.
363
+ :param Optional[float] timeout: The timeout for the request.
364
+ :param int max_retries: The maximum number of retries.
365
+ :param int max_entity_retries: The maximum number of retries for entity errors.
366
+ :return: The response from OpenAI.
367
+ :rtype: OpenAIChatResponseType
368
+ """
369
+
370
+ client_name: str = api_client.__class__.__name__
371
+ client: Union[AsyncAzureOpenAI, AsyncOpenAI] = api_client.with_options(timeout=timeout or NotGiven())
372
+
373
+ entity_retries: List[int] = [0]
374
+ should_retry: bool = True
375
+ retry: int = 0
376
+ delay: Optional[float] = None
377
+
378
+ while should_retry:
379
+ try:
380
+ if delay:
381
+ await asyncio.sleep(delay)
382
+
383
+ response = await client.chat.completions.create(**params)
384
+ return response
385
+ except OpenAIError as error:
386
+ if retry >= max_retries:
387
+ should_retry = False
388
+ else:
389
+ should_retry, delay = openai_error_retryable(error, retry, entity_retries, max_entity_retries)
390
+
391
+ if should_retry:
392
+ self._logger.warning(
393
+ "[%d/%d] %s request failed. %s: %s. Retrying in %f seconds.",
394
+ retry,
395
+ max_retries,
396
+ client_name,
397
+ type(error).__name__,
398
+ str(error),
399
+ delay or 0.0,
400
+ exc_info=True,
401
+ )
402
+ else:
403
+ self._logger.exception(
404
+ "[%d/%d] %s request failed. %s: %s",
405
+ retry,
406
+ max_retries,
407
+ client_name,
408
+ type(error).__name__,
409
+ str(error),
410
+ )
411
+ raise WrappedOpenAIError(error=error) from error
412
+
413
+ retry += 1
414
+
415
+ @staticmethod
416
+ def get_token_provider(cred: Union[TokenCredential, AsyncTokenCredential]) -> AsyncAzureADTokenProvider:
417
+ """Get the token provider for the prompty.
418
+
419
+ :param Union[TokenCredential, AsyncTokenCredential] cred: The Azure authentication credential.
420
+ :return: The token provider if a credential is provided, otherwise None.
421
+ :rtype: Optional[AsyncAzureADTokenProvider]
422
+ """
423
+
424
+ async def _wrapper() -> str:
425
+ token = cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT)
426
+ if isinstance(token, Awaitable):
427
+ token = await token
428
+ return token.token
429
+
430
+ return _wrapper