azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,228 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ """
6
+ This module provides Semantic Kernel Plugin for Red Team Tools.
7
+ These plugins can be used as functions in a Semantic Kernel agent for red teaming purposes.
8
+ """
9
+
10
+ import asyncio
11
+ import json
12
+ from typing import Annotated, Dict, Any, Optional, Callable
13
+
14
+ from semantic_kernel.functions import kernel_function
15
+
16
+ from azure.ai.evaluation.red_team._agent._agent_tools import RedTeamToolProvider
17
+ from azure.identity import DefaultAzureCredential
18
+
19
+
20
+ class RedTeamPlugin:
21
+ """
22
+ A Semantic Kernel plugin that provides red teaming capabilities.
23
+ This plugin wraps around the RedTeamToolProvider to provide red teaming functions
24
+ as Semantic Kernel functions.
25
+
26
+ Example:
27
+ ```python
28
+ # Method 1: Create a plugin with individual environment variables
29
+ plugin = RedTeamPlugin(
30
+ azure_ai_project_endpoint=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
31
+ target_func=lambda x: "Target model response"
32
+ )
33
+
34
+ # Create a Semantic Kernel agent with the plugin
35
+ agent = ChatCompletionAgent(
36
+ service=service,
37
+ name="RedTeamAgent",
38
+ instructions="You are a red team agent...",
39
+ plugins=[plugin],
40
+ )
41
+ ```
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ azure_ai_project_endpoint: str,
47
+ target_func: Optional[Callable[[str], str]] = None,
48
+ *,
49
+ application_scenario: str = "",
50
+ **kwargs,
51
+ ):
52
+ """
53
+ Initialize the RedTeamPlugin with the necessary configuration components.
54
+
55
+ :param azure_ai_project_endpoint: The Azure AI project endpoint (e.g., 'https://your-resource-name.services.ai.azure.com/api/projects/your-project-name')
56
+ :param target_func: Optional function to call with prompts
57
+ :param application_scenario: The application scenario for the tool provider
58
+ """
59
+
60
+ # Initialize credential and tool provider
61
+ self.credential = DefaultAzureCredential()
62
+ self.tool_provider = RedTeamToolProvider(
63
+ azure_ai_project_endpoint=azure_ai_project_endpoint,
64
+ credential=self.credential,
65
+ application_scenario=application_scenario,
66
+ )
67
+
68
+ # Store the target function
69
+ self.target_function = target_func
70
+
71
+ # Dictionary to store fetched prompts for reference
72
+ self.fetched_prompts = {}
73
+
74
+ @kernel_function(description="Fetch a harmful prompt for a specific risk category to test content filters")
75
+ async def fetch_harmful_prompt(
76
+ self,
77
+ risk_category: Annotated[str, "The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')"],
78
+ strategy: Annotated[str, "Attack strategy to use (e.g., 'baseline', 'jailbreak')"] = "baseline",
79
+ convert_with_strategy: Annotated[str, "Optional strategy to convert the prompt"] = "",
80
+ ) -> Annotated[str, "A JSON string with the harmful prompt and metadata"]:
81
+ """
82
+ Fetch a harmful prompt for a specific risk category to test content filters.
83
+
84
+ :param risk_category: The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')
85
+ :param strategy: Attack strategy to use (e.g., 'baseline', 'jailbreak')
86
+ :param convert_with_strategy: Optional strategy to convert the prompt
87
+ :return: A JSON string with the harmful prompt and metadata
88
+ """
89
+ # Convert empty string to None
90
+ if not convert_with_strategy:
91
+ convert_with_strategy = None
92
+
93
+ # Directly await the async method instead of using asyncio.run()
94
+ result = await self.tool_provider.fetch_harmful_prompt(
95
+ risk_category_text=risk_category, strategy=strategy, convert_with_strategy=convert_with_strategy
96
+ )
97
+
98
+ # Store the prompt for later conversion if successful
99
+ if result["status"] == "success" and "prompt_id" in result:
100
+ prompt_id = result["prompt_id"]
101
+ if "prompt" in result:
102
+ self.fetched_prompts[prompt_id] = result["prompt"]
103
+ # Also update the tool provider's cache
104
+ self.tool_provider._fetched_prompts[prompt_id] = result["prompt"]
105
+
106
+ return json.dumps(result)
107
+
108
+ @kernel_function(description="Convert a prompt using a specified strategy")
109
+ async def convert_prompt(
110
+ self,
111
+ prompt_or_id: Annotated[str, "Either a prompt text or a prompt ID from a previous fetch"],
112
+ strategy: Annotated[str, "The strategy to use for conversion"],
113
+ ) -> Annotated[str, "A JSON string with the original and converted prompt"]:
114
+ """
115
+ Convert a prompt or a previously fetched prompt ID using a specified strategy.
116
+
117
+ :param prompt_or_id: Either a prompt text or a prompt ID from a previous fetch
118
+ :param strategy: The strategy to use for conversion
119
+ :return: A JSON string with the original and converted prompt
120
+ """
121
+ # Check if input is a prompt ID we have stored
122
+ if prompt_or_id in self.fetched_prompts:
123
+ # Update the provider's cache
124
+ self.tool_provider._fetched_prompts[prompt_or_id] = self.fetched_prompts[prompt_or_id]
125
+
126
+ # Directly await the async method instead of using asyncio.run()
127
+ result = await self.tool_provider.convert_prompt(prompt_or_id=prompt_or_id, strategy=strategy)
128
+
129
+ return json.dumps(result)
130
+
131
+ @kernel_function(description="Get a harmful prompt for a specific risk category and optionally convert it")
132
+ async def red_team_unified(
133
+ self,
134
+ category: Annotated[str, "The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')"],
135
+ strategy: Annotated[str, "Optional strategy to convert the prompt"] = "",
136
+ ) -> Annotated[str, "A JSON string with the harmful prompt and metadata"]:
137
+ """
138
+ Get a harmful prompt for a specific risk category and optionally convert it.
139
+
140
+ :param category: The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')
141
+ :param strategy: Optional strategy to convert the prompt
142
+ :return: A JSON string with the harmful prompt and metadata
143
+ """
144
+ # Convert empty string to None
145
+ strategy_param = strategy if strategy else None
146
+
147
+ # Directly await the async method instead of using asyncio.run()
148
+ result = await self.tool_provider.red_team(category=category, strategy=strategy_param)
149
+
150
+ # Store the prompt for later conversion if it's a success and we didn't convert it
151
+ if result["status"] == "success":
152
+ if "prompt_id" in result and "prompt" in result:
153
+ prompt_id = result["prompt_id"]
154
+ self.fetched_prompts[prompt_id] = result["prompt"]
155
+ # Also update the tool provider's cache
156
+ self.tool_provider._fetched_prompts[prompt_id] = result["prompt"]
157
+
158
+ return json.dumps(result)
159
+
160
+ @kernel_function(description="Get a list of all available prompt conversion strategies")
161
+ async def get_available_strategies(self) -> Annotated[str, "A JSON string with available conversion strategies"]:
162
+ """
163
+ Get a list of all available prompt conversion strategies.
164
+
165
+ :return: A JSON string with available conversion strategies
166
+ """
167
+ # This method calls a synchronous function, but we make the method itself async
168
+ # for consistency with the rest of the interface
169
+ strategies = self.tool_provider.get_available_strategies()
170
+
171
+ return json.dumps({"status": "success", "available_strategies": strategies})
172
+
173
+ @kernel_function(description="Explain the purpose and responsible use of red teaming tools")
174
+ async def explain_purpose(self) -> Annotated[str, "A JSON string with information about red teaming tools"]:
175
+ """
176
+ Explain the purpose and responsible use of red teaming tools.
177
+
178
+ :return: A JSON string with information about red teaming tools
179
+ """
180
+ # This method doesn't use any async functions, but we make it async
181
+ # for consistency with the rest of the interface
182
+ explanation = {
183
+ "purpose": "Red teaming tools are designed to test AI systems for vulnerabilities and safety issues.",
184
+ "responsible_use": [
185
+ "Only use these tools on systems you have permission to test.",
186
+ "Use the results to improve system safety and robustness.",
187
+ "Do not use these tools to cause harm or bypass legitimate safety measures in production systems.",
188
+ ],
189
+ "risk_categories": {
190
+ "violence": "Content related to physical harm, weapons, or dangerous activities",
191
+ "hate_unfairness": "Content related to discrimination, bias, or prejudice",
192
+ "sexual": "Explicit sexual content",
193
+ "self_harm": "Content related to suicide, self-injury, or harmful behaviors",
194
+ },
195
+ "conversion_strategies": "Various encoding and obfuscation methods to test if harmful content can bypass detection",
196
+ }
197
+
198
+ return json.dumps(explanation)
199
+
200
+ @kernel_function(description="Send a prompt to the target function and return the response")
201
+ async def send_to_target(
202
+ self, prompt: Annotated[str, "The prompt text to send to the target function"]
203
+ ) -> Annotated[str, "A JSON string with the response from the target"]:
204
+ """
205
+ Send a prompt to the target function and return the response.
206
+
207
+ :param prompt: The prompt text to send to the target function
208
+ :return: A JSON string with the response from the target
209
+ """
210
+ # This method doesn't use any async functions, but we make it async
211
+ # for consistency with the rest of the interface
212
+ if self.target_function is None:
213
+ return json.dumps(
214
+ {
215
+ "status": "error",
216
+ "message": "Target function not initialized. Make sure to pass a target_func when initializing the plugin.",
217
+ }
218
+ )
219
+
220
+ try:
221
+ # Call the target function with the prompt
222
+ response = self.target_function(prompt)
223
+
224
+ return json.dumps({"status": "success", "prompt": prompt, "response": response})
225
+ except Exception as e:
226
+ return json.dumps(
227
+ {"status": "error", "message": f"Error calling target function: {str(e)}", "prompt": prompt}
228
+ )
@@ -0,0 +1,268 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ import json
5
+ import logging
6
+ import os
7
+ from enum import Enum
8
+ from typing import List, Optional, Dict, Any
9
+ from pathlib import Path
10
+ from azure.ai.evaluation._common._experimental import experimental
11
+
12
+
13
+ @experimental
14
+ class RiskCategory(str, Enum):
15
+ """Risk categories for attack objectives."""
16
+
17
+ HateUnfairness = "hate_unfairness"
18
+ Violence = "violence"
19
+ Sexual = "sexual"
20
+ SelfHarm = "self_harm"
21
+ ProtectedMaterial = "protected_material"
22
+ CodeVulnerability = "code_vulnerability"
23
+ UngroundedAttributes = "ungrounded_attributes"
24
+ SensitiveDataLeakage = "sensitive_data_leakage" # Agent targets only
25
+ TaskAdherence = "task_adherence" # Agent targets only
26
+ ProhibitedActions = "prohibited_actions" # Agent targets only
27
+
28
+
29
+ @experimental
30
+ class SupportedLanguages(Enum):
31
+ """Supported languages for attack objectives, using ISO standard language codes."""
32
+
33
+ Spanish = "es"
34
+ Italian = "it"
35
+ French = "fr"
36
+ German = "de"
37
+ SimplifiedChinese = "zh-cn"
38
+ Portuguese = "pt"
39
+ Japanese = "ja"
40
+ English = "en"
41
+ Korean = "ko"
42
+
43
+
44
+ @experimental
45
+ class _InternalRiskCategory(str, Enum):
46
+ ECI = "eci"
47
+
48
+
49
+ class _AttackObjectiveGenerator:
50
+ """Generator for creating attack objectives.
51
+
52
+ :param risk_categories: List of risk categories to generate attack objectives for (optional if custom_attack_seed_prompts is provided)
53
+ :type risk_categories: Optional[List[RiskCategory]]
54
+ :param num_objectives: Number of objectives to generate per risk category
55
+ :type num_objectives: int
56
+ :param application_scenario: Description of the application scenario for context
57
+ :type application_scenario: Optional[str]
58
+ :param custom_attack_seed_prompts: Path to a JSON file containing custom attack seed prompts (can be absolute or relative path)
59
+ :type custom_attack_seed_prompts: Optional[str]
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ risk_categories: Optional[List[RiskCategory]] = None,
65
+ num_objectives: int = 10,
66
+ application_scenario: Optional[str] = None,
67
+ custom_attack_seed_prompts: Optional[str] = None,
68
+ ):
69
+ self.risk_categories = risk_categories or []
70
+ self.num_objectives = num_objectives
71
+ self.application_scenario = application_scenario
72
+ self.custom_attack_seed_prompts = custom_attack_seed_prompts
73
+ self.logger = logging.getLogger("_AttackObjectiveGenerator")
74
+
75
+ # If custom_attack_seed_prompts is provided, validate and load them
76
+ self.custom_prompts = None
77
+ self.validated_prompts = []
78
+ self.valid_prompts_by_category = {}
79
+
80
+ if custom_attack_seed_prompts:
81
+ self._load_and_validate_custom_prompts()
82
+
83
+ def _load_and_validate_custom_prompts(self) -> None:
84
+ """Load and validate custom attack seed prompts from the provided file path."""
85
+ if not self.custom_attack_seed_prompts:
86
+ return
87
+
88
+ # Handle both absolute and relative paths
89
+ custom_prompts_path = Path(self.custom_attack_seed_prompts)
90
+
91
+ # Convert to absolute path if it's a relative path
92
+ if not custom_prompts_path.is_absolute():
93
+ self.logger.info(f"Converting relative path '{custom_prompts_path}' to absolute path")
94
+ custom_prompts_path = Path.cwd() / custom_prompts_path
95
+
96
+ self.logger.debug(f"Using absolute path: {custom_prompts_path}")
97
+
98
+ # Check if the file exists
99
+ if not custom_prompts_path.exists():
100
+ raise ValueError(f"Custom attack seed prompts file not found: {custom_prompts_path}")
101
+
102
+ try:
103
+ # Load JSON file
104
+ with open(custom_prompts_path, "r", encoding="utf-8") as f:
105
+ self.custom_prompts = json.load(f)
106
+
107
+ # Validate that it's a list
108
+ if not isinstance(self.custom_prompts, list):
109
+ raise ValueError(
110
+ f"Custom attack seed prompts must be a JSON array, got {type(self.custom_prompts)}, see https://aka.ms/airedteamingagent-howtodoc for more information"
111
+ )
112
+
113
+ self.logger.info(f"Loaded {len(self.custom_prompts)} prompts from {self.custom_attack_seed_prompts}")
114
+
115
+ # Initialize dictionary for categorized prompts
116
+ for risk_category in RiskCategory:
117
+ self.valid_prompts_by_category[risk_category.value] = []
118
+
119
+ # Process each prompt and validate format
120
+ valid_prompts_count = 0
121
+ invalid_prompts_count = 0
122
+ for i, prompt in enumerate(self.custom_prompts):
123
+ try:
124
+ # Check required fields
125
+ if not isinstance(prompt, dict):
126
+ self.logger.warning(f"Skipping prompt {i}: not a JSON object")
127
+ continue
128
+
129
+ if "metadata" not in prompt:
130
+ self.logger.warning(f"Skipping prompt {i}: missing 'metadata' field")
131
+ continue
132
+
133
+ if "messages" not in prompt or not prompt["messages"]:
134
+ self.logger.warning(f"Skipping prompt {i}: missing or empty 'messages' field")
135
+ continue
136
+
137
+ # Check metadata structure
138
+ metadata = prompt["metadata"]
139
+ if not isinstance(metadata, dict):
140
+ self.logger.warning(
141
+ f"Skipping prompt {i}: 'metadata' is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
142
+ )
143
+ continue
144
+
145
+ if "target_harms" not in metadata or not metadata["target_harms"]:
146
+ self.logger.warning(
147
+ f"Skipping prompt {i}: missing or empty 'target_harms' in metadata, see https://aka.ms/airedteamingagent-howtodoc for more information"
148
+ )
149
+ continue
150
+
151
+ # Check target_harms structure
152
+ valid_risk_types = {cat.value for cat in RiskCategory}
153
+ valid_risk_found = False
154
+ prompt_categories = []
155
+
156
+ for harm in metadata["target_harms"]:
157
+ if not isinstance(harm, dict):
158
+ self.logger.warning(
159
+ f"Skipping harm in prompt {i}: not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
160
+ )
161
+ continue
162
+
163
+ if "risk-type" not in harm:
164
+ self.logger.warning(
165
+ f"Skipping harm in prompt {i}: missing 'risk-type' field, see https://aka.ms/airedteamingagent-howtodoc for more information"
166
+ )
167
+ continue
168
+
169
+ risk_type = harm.get("risk-type", "")
170
+ if risk_type not in valid_risk_types:
171
+ self.logger.warning(
172
+ f"Skipping harm in prompt {i}: invalid risk-type '{risk_type}'. Valid types: {valid_risk_types}. see https://aka.ms/airedteamingagent-howtodoc for more information"
173
+ )
174
+ continue
175
+
176
+ prompt_categories.append(risk_type)
177
+ valid_risk_found = True
178
+
179
+ if not valid_risk_found:
180
+ self.logger.warning(
181
+ f"Skipping prompt {i}: no valid risk types found. See https://aka.ms/airedteamingagent-howtodoc for more information"
182
+ )
183
+ continue
184
+
185
+ # Check messages structure
186
+ messages = prompt["messages"]
187
+ if not isinstance(messages, list) or not messages:
188
+ self.logger.warning(
189
+ f"Skipping prompt {i}: 'messages' is not a list or is empty, see https://aka.ms/airedteamingagent-howtodoc for more information"
190
+ )
191
+ continue
192
+
193
+ message = messages[0]
194
+ if not isinstance(message, dict):
195
+ self.logger.warning(
196
+ f"Skipping prompt {i}: first message is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
197
+ )
198
+ continue
199
+
200
+ if "role" not in message or message["role"] != "user":
201
+ self.logger.warning(
202
+ f"Skipping prompt {i}: first message must have role='user', see https://aka.ms/airedteamingagent-howtodoc for more information"
203
+ )
204
+ continue
205
+
206
+ if "content" not in message or not message["content"]:
207
+ self.logger.warning(
208
+ f"Skipping prompt {i}: first message missing or empty 'content', see https://aka.ms/airedteamingagent-howtodoc for more information"
209
+ )
210
+ continue
211
+
212
+ # If we got here, the prompt is valid
213
+ self.validated_prompts.append(prompt)
214
+ valid_prompts_count += 1
215
+
216
+ # Add to the appropriate categories
217
+ for category in prompt_categories:
218
+ self.valid_prompts_by_category[category].append(prompt)
219
+
220
+ except Exception as e:
221
+ self.logger.warning(f"Error validating prompt {i}: {str(e)}")
222
+ invalid_prompts_count += 1
223
+
224
+ # Check if we have at least one valid prompt
225
+ if valid_prompts_count == 0:
226
+ raise ValueError(
227
+ "No valid prompts found in custom attack seed prompts file. See https://aka.ms/airedteamingagent-howtodoc for more information"
228
+ )
229
+
230
+ self.logger.info(f"Loaded {valid_prompts_count} valid prompts from custom attack seed prompts file")
231
+
232
+ if invalid_prompts_count > 0:
233
+ self.logger.warning(f"Skipped {invalid_prompts_count} invalid prompts")
234
+
235
+ # Log the breakdown by risk category
236
+ category_counts = {
237
+ cat: len(prompts) for cat, prompts in self.valid_prompts_by_category.items() if len(prompts) > 0
238
+ }
239
+ self.logger.info(f"Prompt distribution by risk category: {category_counts}")
240
+
241
+ # Merge risk categories from custom prompts with explicitly provided risk_categories
242
+ categories_with_prompts = [cat for cat, prompts in self.valid_prompts_by_category.items() if prompts]
243
+ categories_from_prompts = [RiskCategory(cat) for cat in categories_with_prompts]
244
+
245
+ if self.risk_categories:
246
+ # Combine explicitly provided categories with those from custom prompts
247
+ combined_categories = list(set(self.risk_categories + categories_from_prompts))
248
+ self.logger.info(
249
+ f"Merging provided risk categories {[cat.value for cat in self.risk_categories]} "
250
+ f"with categories from custom prompts {[cat.value for cat in categories_from_prompts]} "
251
+ f"-> Combined: {[cat.value for cat in combined_categories]}"
252
+ )
253
+ self.risk_categories = combined_categories
254
+ else:
255
+ # No risk categories provided, use only those from custom prompts
256
+ self.risk_categories = categories_from_prompts
257
+ self.logger.info(
258
+ f"Automatically set risk categories based on valid prompts: {[cat.value for cat in self.risk_categories]}"
259
+ )
260
+
261
+ except json.JSONDecodeError as e:
262
+ raise ValueError(
263
+ f"Failed to parse custom attack seed prompts file: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information"
264
+ )
265
+ except Exception as e:
266
+ raise ValueError(
267
+ f"Error loading custom attack seed prompts: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information"
268
+ )
@@ -0,0 +1,49 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from enum import Enum
5
+ from typing import List
6
+ from azure.ai.evaluation._common._experimental import experimental
7
+
8
+
9
+ @experimental
10
+ class AttackStrategy(Enum):
11
+ """Strategies for attacks."""
12
+
13
+ EASY = "easy"
14
+ MODERATE = "moderate"
15
+ DIFFICULT = "difficult"
16
+ AnsiAttack = "ansi_attack"
17
+ AsciiArt = "ascii_art"
18
+ AsciiSmuggler = "ascii_smuggler"
19
+ Atbash = "atbash"
20
+ Base64 = "base64"
21
+ Binary = "binary"
22
+ Caesar = "caesar"
23
+ CharacterSpace = "character_space"
24
+ CharSwap = "char_swap"
25
+ Diacritic = "diacritic"
26
+ Flip = "flip"
27
+ Leetspeak = "leetspeak"
28
+ Morse = "morse"
29
+ ROT13 = "rot13"
30
+ SuffixAppend = "suffix_append"
31
+ StringJoin = "string_join"
32
+ Tense = "tense"
33
+ UnicodeConfusable = "unicode_confusable"
34
+ UnicodeSubstitution = "unicode_substitution"
35
+ Url = "url"
36
+ Baseline = "baseline"
37
+ Jailbreak = "jailbreak"
38
+ MultiTurn = "multi_turn"
39
+ Crescendo = "crescendo"
40
+ IndirectJailbreak = "indirect_jailbreak"
41
+
42
+ @classmethod
43
+ def Compose(cls, items: List["AttackStrategy"]) -> List["AttackStrategy"]:
44
+ for item in items:
45
+ if not isinstance(item, cls):
46
+ raise ValueError("All items must be instances of AttackStrategy")
47
+ if len(items) > 2:
48
+ raise ValueError("Composed strategies must have at most 2 items")
49
+ return items
@@ -0,0 +1,115 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ import logging
5
+ from typing import Any, Callable, Dict, List, Optional
6
+
7
+ from pyrit.models import (
8
+ PromptRequestResponse,
9
+ construct_response_from_request,
10
+ )
11
+ from pyrit.prompt_target import PromptChatTarget
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class _CallbackChatTarget(PromptChatTarget):
17
+ def __init__(
18
+ self,
19
+ *,
20
+ callback: Callable[[List[Dict], bool, Optional[str], Optional[Dict[str, Any]]], Dict],
21
+ stream: bool = False,
22
+ ) -> None:
23
+ """
24
+ Initializes an instance of the _CallbackChatTarget class.
25
+
26
+ It is intended to be used with PyRIT where users define a callback function
27
+ that handles sending a prompt to a target and receiving a response.
28
+ The _CallbackChatTarget class is a wrapper around the callback function that allows it to be used
29
+ as a target in the PyRIT framework.
30
+ For that reason, it merely handles additional functionality such as memory.
31
+
32
+ Args:
33
+ callback (Callable): The callback function that sends a prompt to a target and receives a response.
34
+ stream (bool, optional): Indicates whether the target supports streaming. Defaults to False.
35
+ """
36
+ PromptChatTarget.__init__(self)
37
+ self._callback = callback
38
+ self._stream = stream
39
+
40
+ async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> PromptRequestResponse:
41
+
42
+ self._validate_request(prompt_request=prompt_request)
43
+ request = prompt_request.request_pieces[0]
44
+
45
+ messages = self._memory.get_chat_messages_with_conversation_id(conversation_id=request.conversation_id)
46
+
47
+ messages.append(request.to_chat_message())
48
+
49
+ logger.info(f"Sending the following prompt to the prompt target: {request}")
50
+
51
+ # Extract context from request labels if available
52
+ # The context is stored in memory labels when the prompt is sent by orchestrator
53
+ context_dict = {}
54
+ if hasattr(request, "labels") and request.labels and "context" in request.labels:
55
+ context_data = request.labels["context"]
56
+ if context_data and isinstance(context_data, dict):
57
+ # context_data is always a dict with 'contexts' list
58
+ # Each context can have its own context_type and tool_name
59
+ contexts = context_data.get("contexts", [])
60
+
61
+ # Build context_dict to pass to callback
62
+ context_dict = {"contexts": contexts}
63
+
64
+ # Check if any context has agent-specific fields for logging
65
+ has_agent_fields = any(
66
+ isinstance(ctx, dict)
67
+ and ("context_type" in ctx and "tool_name" in ctx and ctx["tool_name"] is not None)
68
+ for ctx in contexts
69
+ )
70
+
71
+ if has_agent_fields:
72
+ tool_names = [
73
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
74
+ ]
75
+ logger.debug(f"Extracted agent context: {len(contexts)} context source(s), tool_names={tool_names}")
76
+ else:
77
+ logger.debug(f"Extracted model context: {len(contexts)} context source(s)")
78
+
79
+ # response_context contains "messages", "stream", "session_state, "context"
80
+ response = await self._callback(messages=messages, stream=self._stream, session_state=None, context=context_dict) # type: ignore
81
+
82
+ # Store token_usage before processing tuple
83
+ token_usage = None
84
+ if isinstance(response, dict) and "token_usage" in response:
85
+ token_usage = response["token_usage"]
86
+
87
+ if type(response) == tuple:
88
+ response, tool_output = response
89
+ request.labels["tool_calls"] = tool_output
90
+ # Check for token_usage in the response dict from tuple
91
+ if isinstance(response, dict) and "token_usage" in response:
92
+ token_usage = response["token_usage"]
93
+
94
+ response_text = response["messages"][-1]["content"]
95
+
96
+ response_entry = construct_response_from_request(request=request, response_text_pieces=[response_text])
97
+
98
+ # Add token_usage to the response entry's labels (not the request)
99
+ if token_usage:
100
+ response_entry.request_pieces[0].labels["token_usage"] = token_usage
101
+ logger.debug(f"Captured token usage from callback: {token_usage}")
102
+
103
+ logger.info("Received the following response from the prompt target" + f"{response_text}")
104
+ return response_entry
105
+
106
+ def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
107
+ if len(prompt_request.request_pieces) != 1:
108
+ raise ValueError("This target only supports a single prompt request piece.")
109
+
110
+ if prompt_request.request_pieces[0].converted_value_data_type != "text":
111
+ raise ValueError("This target only supports text prompt input.")
112
+
113
+ def is_json_response_supported(self) -> bool:
114
+ """Indicates that this target supports JSON response format."""
115
+ return False