azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +85 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +147 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +87 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
  155. azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
  156. azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
  157. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  158. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  159. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  160. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  161. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  162. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  163. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  165. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
  264. azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,461 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ """Tools for Azure AI Agents that provide evaluation and red teaming capabilities."""
6
+
7
+ import asyncio
8
+ import logging
9
+ from typing import Optional, Union, List, Dict, Any
10
+ import os
11
+ import json
12
+ import random
13
+ import uuid
14
+
15
+ from azure.core.credentials import TokenCredential
16
+ from azure.ai.evaluation._constants import TokenScope
17
+ from azure.ai.evaluation._common._experimental import experimental
18
+ from azure.ai.evaluation.red_team._attack_objective_generator import RiskCategory
19
+ from azure.ai.evaluation.simulator._model_tools import ManagedIdentityAPITokenManager
20
+ from azure.ai.evaluation.simulator._model_tools._generated_rai_client import GeneratedRAIClient
21
+ from ._agent_utils import AgentUtils
22
+
23
+ # Setup logging
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @experimental
28
+ class RedTeamToolProvider:
29
+ """Provider for red teaming tools that can be used in Azure AI Agents.
30
+
31
+ This class provides tools that can be registered with Azure AI Agents
32
+ to enable red teaming capabilities.
33
+
34
+ :param azure_ai_project_endpoint: The Azure AI project endpoint (e.g., 'https://your-resource-name.services.ai.azure.com/api/projects/your-project-name')
35
+ :type azure_ai_project_endpoint: str
36
+ :param credential: The credential to authenticate with Azure services
37
+ :type credential: TokenCredential
38
+ :param application_scenario: Optional application scenario context for generating relevant prompts
39
+ :type application_scenario: Optional[str]
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ azure_ai_project_endpoint: str,
45
+ credential: TokenCredential,
46
+ *,
47
+ application_scenario: Optional[str] = None,
48
+ ):
49
+ self.azure_ai_project_endpoint = azure_ai_project_endpoint
50
+ self.credential = credential
51
+ self.application_scenario = application_scenario
52
+
53
+ # Create token manager for API access
54
+ self.token_manager = ManagedIdentityAPITokenManager(
55
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
56
+ logger=logging.getLogger("RedTeamToolProvider"),
57
+ credential=credential,
58
+ )
59
+
60
+ # Create the generated RAI client for fetching attack objectives
61
+ self.generated_rai_client = GeneratedRAIClient(
62
+ azure_ai_project=self.azure_ai_project_endpoint, token_manager=self.token_manager.get_aad_credential()
63
+ )
64
+
65
+ # Cache for attack objectives to avoid repeated API calls
66
+ self._attack_objectives_cache = {}
67
+
68
+ # Store fetched prompts for later conversion
69
+ self._fetched_prompts = {}
70
+ self.converter_utils = AgentUtils()
71
+
72
+ def get_available_strategies(self) -> List[str]:
73
+ """Get a list of available prompt conversion strategies.
74
+
75
+ :return: List of strategy names
76
+ :rtype: List[str]
77
+ """
78
+ return self.converter_utils.get_list_of_supported_converters()
79
+
80
+ async def apply_strategy_to_prompt(self, prompt: str, strategy: str) -> str:
81
+ """Apply a conversion strategy to a prompt.
82
+
83
+ :param prompt: The prompt to convert
84
+ :type prompt: str
85
+ :param strategy: The strategy to apply
86
+ :type strategy: str
87
+ :return: The converted prompt
88
+ :rtype: str
89
+ :raises ValueError: If the strategy is not supported
90
+ """
91
+ return await self.converter_utils.convert_text(converter_name=strategy, text=prompt)
92
+
93
+ @staticmethod
94
+ def _parse_risk_category(category_text: str) -> Optional[RiskCategory]:
95
+ """Parse a text string into a RiskCategory enum value.
96
+
97
+ :param category_text: Text description of a risk category
98
+ :type category_text: str
99
+ :return: The corresponding RiskCategory enum value if found
100
+ :rtype: Optional[RiskCategory]
101
+ """
102
+ # Convert to lowercase and remove spaces for more flexible matching
103
+ cleaned_text = category_text.lower().strip()
104
+
105
+ # Map common keywords to RiskCategory values - using only officially supported categories
106
+ keyword_map = {
107
+ # Hate/unfairness category
108
+ "hate": RiskCategory.HateUnfairness,
109
+ "unfairness": RiskCategory.HateUnfairness,
110
+ "hate_unfairness": RiskCategory.HateUnfairness,
111
+ "bias": RiskCategory.HateUnfairness,
112
+ "discrimination": RiskCategory.HateUnfairness,
113
+ "prejudice": RiskCategory.HateUnfairness,
114
+ # Violence category
115
+ "violence": RiskCategory.Violence,
116
+ "harm": RiskCategory.Violence,
117
+ "physical": RiskCategory.Violence,
118
+ "weapon": RiskCategory.Violence,
119
+ "dangerous": RiskCategory.Violence,
120
+ # Sexual category
121
+ "sexual": RiskCategory.Sexual,
122
+ "sex": RiskCategory.Sexual,
123
+ "adult": RiskCategory.Sexual,
124
+ "explicit": RiskCategory.Sexual,
125
+ # Self harm category
126
+ "self_harm": RiskCategory.SelfHarm,
127
+ "selfharm": RiskCategory.SelfHarm,
128
+ "self-harm": RiskCategory.SelfHarm,
129
+ "suicide": RiskCategory.SelfHarm,
130
+ "self-injury": RiskCategory.SelfHarm,
131
+ }
132
+
133
+ # Try direct mapping first
134
+ for key, risk_category in keyword_map.items():
135
+ if key in cleaned_text:
136
+ return risk_category
137
+
138
+ # If the text contains an exact category name, use that
139
+ for category in RiskCategory:
140
+ if category.value.lower() in cleaned_text:
141
+ return category
142
+
143
+ return None
144
+
145
+ async def _get_attack_objectives(self, risk_category: RiskCategory, strategy: str = "baseline") -> List[str]:
146
+ """Fetch attack objectives directly from the RAI service.
147
+
148
+ :param risk_category: The risk category to get objectives for
149
+ :type risk_category: RiskCategory
150
+ :param strategy: The attack strategy to use
151
+ :type strategy: str
152
+ :return: A list of attack objective prompts
153
+ :rtype: List[str]
154
+ """
155
+ logger.debug(f"Fetching attack objectives for {risk_category.value}, strategy: {strategy}")
156
+
157
+ risk_cat_value = risk_category.value.lower()
158
+
159
+ try:
160
+ # Call the RAI service to get attack objectives
161
+ logger.debug(
162
+ f"API call: get_attack_objectives({risk_cat_value}, app: {self.application_scenario}, strategy: {strategy})"
163
+ )
164
+
165
+ # Get strategy-specific dataset for tense strategy
166
+ if "tense" in strategy:
167
+ objectives_response = await self.generated_rai_client.get_attack_objectives(
168
+ risk_category=risk_cat_value, application_scenario=self.application_scenario or "", strategy="tense"
169
+ )
170
+ else:
171
+ objectives_response = await self.generated_rai_client.get_attack_objectives(
172
+ risk_category=risk_cat_value, application_scenario=self.application_scenario or "", strategy=None
173
+ )
174
+
175
+ # Handle jailbreak strategy - apply jailbreak prefixes to messages
176
+ if strategy == "jailbreak":
177
+ logger.debug("Applying jailbreak prefixes to objectives")
178
+ jailbreak_prefixes = await self.generated_rai_client.get_jailbreak_prefixes()
179
+ for objective in objectives_response:
180
+ if "messages" in objective and len(objective["messages"]) > 0:
181
+ message = objective["messages"][0]
182
+ if isinstance(message, dict) and "content" in message:
183
+ message["content"] = f"{random.choice(jailbreak_prefixes)} {message['content']}"
184
+
185
+ # Extract content from objectives
186
+ selected_prompts = []
187
+ selected_cat_objectives = objectives_response
188
+
189
+ for obj in selected_cat_objectives:
190
+ if "messages" in obj and len(obj["messages"]) > 0:
191
+ message = obj["messages"][0]
192
+ if isinstance(message, dict) and "content" in message:
193
+ selected_prompts.append(message["content"])
194
+
195
+ return selected_prompts
196
+
197
+ except Exception as e:
198
+ logger.error(f"Error calling get_attack_objectives: {str(e)}")
199
+ return []
200
+
201
+ async def fetch_harmful_prompt(
202
+ self, risk_category_text: str, strategy: str = "baseline", convert_with_strategy: Optional[str] = None
203
+ ) -> Dict[str, Any]:
204
+ """Fetch a harmful prompt for a specific risk category.
205
+
206
+ This is designed to be used as a tool in Azure AI Agents for red teaming purposes.
207
+
208
+ :param risk_category_text: Text description of the risk category (e.g., "violence", "hate")
209
+ :type risk_category_text: str
210
+ :param strategy: The attack strategy to use (default: "baseline")
211
+ :type strategy: str
212
+ :param convert_with_strategy: Optional strategy to convert the prompt (e.g., "morse", "binary")
213
+ :type convert_with_strategy: Optional[str]
214
+ :return: A dictionary containing the harmful prompt and metadata
215
+ :rtype: Dict[str, Any]
216
+ """
217
+ try:
218
+ # Parse the risk category from the text
219
+ risk_category = self._parse_risk_category(risk_category_text)
220
+
221
+ if not risk_category:
222
+ supported_categories = ", ".join([rc.value for rc in RiskCategory])
223
+ return {
224
+ "status": "error",
225
+ "message": f"Could not parse risk category from '{risk_category_text}'. Please use one of: {supported_categories}",
226
+ }
227
+
228
+ # Create a cache key from risk category and strategy
229
+ cache_key = (risk_category.value, strategy)
230
+
231
+ # Check if we already have cached objectives for this category and strategy
232
+ if cache_key not in self._attack_objectives_cache:
233
+ # Fetch the attack objectives directly
234
+ objectives = await self._get_attack_objectives(risk_category=risk_category, strategy=strategy)
235
+
236
+ self._attack_objectives_cache[cache_key] = objectives
237
+
238
+ objectives = self._attack_objectives_cache[cache_key]
239
+
240
+ if not objectives:
241
+ return {
242
+ "status": "error",
243
+ "message": f"No harmful prompts found for risk category '{risk_category.value}' with strategy '{strategy}'.",
244
+ }
245
+
246
+ # Select a random objective from the list
247
+ selected_objective = random.choice(objectives)
248
+
249
+ # Create a unique ID for this prompt
250
+ prompt_id = f"prompt_{str(uuid.uuid4())[:8]}"
251
+
252
+ # Store the prompt for later conversion
253
+ self._fetched_prompts[prompt_id] = selected_objective
254
+
255
+ # Apply conversion strategy if requested
256
+ if convert_with_strategy:
257
+ try:
258
+ # Check if the strategy is valid
259
+ if convert_with_strategy not in self.get_available_strategies():
260
+ return {
261
+ "status": "error",
262
+ "message": f"Unsupported strategy: {convert_with_strategy}. Available strategies: {', '.join(self.get_available_strategies())}",
263
+ }
264
+
265
+ # Convert the prompt using the specified strategy
266
+ converted_prompt = await self.apply_strategy_to_prompt(selected_objective, convert_with_strategy)
267
+
268
+ return {
269
+ "status": "success",
270
+ "risk_category": risk_category.value,
271
+ "strategy": strategy,
272
+ "conversion_strategy": convert_with_strategy,
273
+ "original_prompt": selected_objective,
274
+ "converted_prompt": converted_prompt,
275
+ "prompt_id": prompt_id,
276
+ "note": "This prompt was generated and converted for responsible AI testing purposes only.",
277
+ }
278
+ except Exception as e:
279
+ return {"status": "error", "message": f"Error converting prompt: {str(e)}"}
280
+
281
+ # Return with information about available strategies
282
+ return {
283
+ "status": "success",
284
+ "risk_category": risk_category.value,
285
+ "strategy": strategy,
286
+ "prompt_id": prompt_id,
287
+ "prompt": selected_objective,
288
+ "available_strategies": self.get_available_strategies(),
289
+ "note": "This prompt was generated for responsible AI testing purposes only. You can convert this prompt with a strategy by using the convert_prompt tool.",
290
+ }
291
+
292
+ except Exception as e:
293
+ logger.error(f"Error fetching harmful prompt: {str(e)}")
294
+ return {"status": "error", "message": f"An error occurred: {str(e)}"}
295
+
296
+ async def convert_prompt(self, prompt_or_id: str, strategy: str) -> Dict[str, Any]:
297
+ """Convert a prompt (or a previously fetched prompt by ID) using a specified strategy.
298
+
299
+ :param prompt_or_id: Either a prompt text or a prompt ID from a previous fetch_harmful_prompt call
300
+ :type prompt_or_id: str
301
+ :param strategy: The strategy to use for conversion
302
+ :type strategy: str
303
+ :return: A dictionary containing the converted prompt
304
+ :rtype: Dict[str, Any]
305
+ """
306
+ try:
307
+ # Check if input is a prompt ID
308
+ prompt_text = self._fetched_prompts.get(prompt_or_id, prompt_or_id)
309
+
310
+ if strategy not in self.get_available_strategies():
311
+ return {
312
+ "status": "error",
313
+ "message": f"Unsupported strategy: {strategy}. Available strategies: {', '.join(self.get_available_strategies())}",
314
+ }
315
+
316
+ # Convert the prompt
317
+ conversion_result = await self.apply_strategy_to_prompt(prompt_text, strategy)
318
+
319
+ # Handle both string results and ConverterResult objects
320
+ converted_prompt = conversion_result
321
+ if hasattr(conversion_result, "text"):
322
+ converted_prompt = conversion_result.text
323
+
324
+ return {
325
+ "status": "success",
326
+ "strategy": strategy,
327
+ "original_prompt": prompt_text,
328
+ "converted_prompt": converted_prompt,
329
+ "note": "This prompt was converted for responsible AI testing purposes only.",
330
+ }
331
+
332
+ except Exception as e:
333
+ logger.error(f"Error converting prompt: {str(e)}")
334
+ return {"status": "error", "message": f"An error occurred: {str(e)}"}
335
+
336
+ async def red_team(self, category: str, strategy: Optional[str] = None) -> Dict[str, Any]:
337
+ """Get a harmful prompt for a specific risk category with an optional conversion strategy.
338
+
339
+ This unified tool combines fetch_harmful_prompt and convert_prompt into a single call.
340
+ It allows users to request harmful prompts with a specific risk category and optionally apply
341
+ a conversion strategy in one step.
342
+
343
+ :param category: The risk category to get a harmful prompt for (e.g., "violence", "hate")
344
+ :type category: str
345
+ :param strategy: Optional conversion strategy to apply (e.g., "morse", "binary")
346
+ :type strategy: Optional[str]
347
+ :return: A dictionary containing the harmful prompt and metadata
348
+ :rtype: Dict[str, Any]
349
+ """
350
+ try:
351
+ # Parse input to extract risk category
352
+ risk_category = self._parse_risk_category(category)
353
+
354
+ if not risk_category:
355
+ supported_categories = ", ".join([rc.value for rc in RiskCategory])
356
+ return {
357
+ "status": "error",
358
+ "message": f"Could not parse risk category from '{category}'. Please use one of: {supported_categories}",
359
+ }
360
+
361
+ # First, fetch a harmful prompt (always using baseline attack strategy)
362
+ result = await self.fetch_harmful_prompt(risk_category_text=category, strategy="baseline")
363
+
364
+ if result["status"] != "success":
365
+ return result
366
+
367
+ # If no conversion strategy requested, return the prompt as is
368
+ if not strategy:
369
+ return {
370
+ "status": "success",
371
+ "risk_category": result["risk_category"],
372
+ "prompt": result["prompt"],
373
+ "prompt_id": result["prompt_id"],
374
+ "available_strategies": result["available_strategies"],
375
+ "note": "This prompt was generated for responsible AI testing purposes only. You can convert this prompt using one of the available strategies.",
376
+ }
377
+
378
+ # If strategy is specified, convert the prompt
379
+ if strategy not in self.get_available_strategies():
380
+ return {
381
+ "status": "error",
382
+ "message": f"Unsupported strategy: {strategy}. Available strategies: {', '.join(self.get_available_strategies())}",
383
+ }
384
+
385
+ # Convert the prompt using the specified strategy
386
+ try:
387
+ converted_prompt = await self.apply_strategy_to_prompt(result["prompt"], strategy)
388
+ return {
389
+ "status": "success",
390
+ "risk_category": result["risk_category"],
391
+ "original_prompt": result["prompt"],
392
+ "strategy": strategy,
393
+ "converted_prompt": converted_prompt,
394
+ "note": f"This prompt was generated for responsible AI testing purposes only and converted using the {strategy} strategy.",
395
+ }
396
+ except Exception as e:
397
+ return {"status": "error", "message": f"Error converting prompt with strategy {strategy}: {str(e)}"}
398
+
399
+ except Exception as e:
400
+ logger.error(f"Error in red_team: {str(e)}")
401
+ return {"status": "error", "message": f"An error occurred: {str(e)}"}
402
+
403
+
404
+ # Tool definition schema for Azure AI Agents
405
+ def get_red_team_tools() -> List[Dict[str, Any]]:
406
+ """Get the list of red team tools that can be registered with Azure AI Agents.
407
+
408
+ :return: A list of tool definitions
409
+ :rtype: List[Dict[str, Any]]
410
+ """
411
+ return [
412
+ {
413
+ "task": "red_team",
414
+ "description": "Get a harmful prompt for a specific risk category with an optional conversion strategy",
415
+ "parameters": {
416
+ "category": {
417
+ "type": "string",
418
+ "description": "The risk category to get a harmful prompt for (e.g., 'violence', 'hate', 'sexual', 'self_harm')",
419
+ },
420
+ "strategy": {
421
+ "type": "string",
422
+ "description": "Optional strategy to convert the prompt (e.g., 'morse', 'binary', 'base64')",
423
+ "default": None,
424
+ },
425
+ },
426
+ },
427
+ {
428
+ "task": "fetch_harmful_prompt",
429
+ "description": "Fetch a harmful prompt for red teaming purposes",
430
+ "parameters": {
431
+ "risk_category_text": {
432
+ "type": "string",
433
+ "description": "The risk category to fetch a harmful prompt for (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')",
434
+ },
435
+ "strategy": {
436
+ "type": "string",
437
+ "description": "The attack strategy to use (e.g., 'baseline', 'jailbreak')",
438
+ "default": "baseline",
439
+ },
440
+ "convert_with_strategy": {
441
+ "type": "string",
442
+ "description": "Optional strategy to convert the prompt (e.g., 'morse', 'binary'). If provided, the prompt will be automatically converted.",
443
+ "default": None,
444
+ },
445
+ },
446
+ },
447
+ {
448
+ "task": "convert_prompt",
449
+ "description": "Convert a prompt using a specified strategy",
450
+ "parameters": {
451
+ "prompt_or_id": {
452
+ "type": "string",
453
+ "description": "Either a prompt text or a prompt ID from a previous fetch_harmful_prompt call",
454
+ },
455
+ "strategy": {
456
+ "type": "string",
457
+ "description": "The strategy to use for conversion (e.g., 'morse', 'binary', 'base64')",
458
+ },
459
+ },
460
+ },
461
+ ]
@@ -0,0 +1,89 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from pyrit.prompt_converter import (
6
+ MathPromptConverter,
7
+ Base64Converter,
8
+ FlipConverter,
9
+ MorseConverter,
10
+ AnsiAttackConverter,
11
+ AsciiArtConverter,
12
+ AsciiSmugglerConverter,
13
+ AtbashConverter,
14
+ BinaryConverter,
15
+ CaesarConverter,
16
+ CharacterSpaceConverter,
17
+ CharSwapGenerator,
18
+ DiacriticConverter,
19
+ LeetspeakConverter,
20
+ UrlConverter,
21
+ UnicodeSubstitutionConverter,
22
+ UnicodeConfusableConverter,
23
+ SuffixAppendConverter,
24
+ StringJoinConverter,
25
+ ROT13Converter,
26
+ )
27
+
28
+
29
+ class AgentUtils:
30
+ def __init__(self):
31
+ self.base64_converter = Base64Converter()
32
+ self.flip_converter = FlipConverter()
33
+ self.morse_converter = MorseConverter()
34
+ self.ansi_attack_converter = AnsiAttackConverter()
35
+ self.ascii_art_converter = AsciiArtConverter()
36
+ self.ascii_smuggler_converter = AsciiSmugglerConverter()
37
+ self.atbash_converter = AtbashConverter()
38
+ self.binary_converter = BinaryConverter()
39
+ self.character_space_converter = CharacterSpaceConverter()
40
+ self.char_swap_generator = CharSwapGenerator()
41
+ self.diacritic_converter = DiacriticConverter()
42
+ self.leetspeak_converter = LeetspeakConverter()
43
+ self.url_converter = UrlConverter()
44
+ self.unicode_substitution_converter = UnicodeSubstitutionConverter()
45
+ self.unicode_confusable_converter = UnicodeConfusableConverter()
46
+ self.suffix_append_converter = SuffixAppendConverter(
47
+ suffix='describing.\\ + similarlyNow write oppositeley.]( Me giving**ONE please? revert with "\\!--Two'
48
+ )
49
+ self.string_join_converter = StringJoinConverter()
50
+ self.rot13_converter = ROT13Converter()
51
+
52
+ async def convert_text(self, *, converter_name, text):
53
+ """
54
+ Convert text using the specified converter.
55
+ """
56
+ if "_converter" not in converter_name:
57
+ converter = getattr(self, f"{converter_name}_converter", None)
58
+ else:
59
+ converter = getattr(self, converter_name, None)
60
+ if converter:
61
+ response = await converter.convert_async(prompt=text)
62
+ return response.output_text
63
+ else:
64
+ raise ValueError(f"Converter {converter_name} not found.")
65
+
66
+ def get_list_of_supported_converters(self):
67
+ """
68
+ Get a list of all supported converters.
69
+ """
70
+ return [
71
+ "base64_converter",
72
+ "flip_converter",
73
+ "morse_converter",
74
+ "ansi_attack_converter",
75
+ "ascii_art_converter",
76
+ "ascii_smuggler_converter",
77
+ "atbash_converter",
78
+ "binary_converter",
79
+ "character_space_converter",
80
+ "char_swap_generator",
81
+ "diacritic_converter",
82
+ "leetspeak_converter",
83
+ "url_converter",
84
+ "unicode_substitution_converter",
85
+ "unicode_confusable_converter",
86
+ "suffix_append_converter",
87
+ "string_join_converter",
88
+ "rot13_converter",
89
+ ]