azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +85 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +147 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +87 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
  155. azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
  156. azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
  157. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  158. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  159. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  160. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  161. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  162. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  163. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  165. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
  264. azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,803 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ """
5
+ Orchestrator management module for Red Team Agent.
6
+
7
+ This module handles PyRIT orchestrator initialization, execution, and management
8
+ for different attack strategies including single-turn, multi-turn, and crescendo attacks.
9
+ """
10
+
11
+ import asyncio
12
+ import math
13
+ import os
14
+ import uuid
15
+ from datetime import datetime
16
+ from typing import Dict, List, Optional, Union, Callable
17
+ from tqdm import tqdm
18
+
19
+ # PyRIT imports
20
+ from pyrit.orchestrator.single_turn.prompt_sending_orchestrator import PromptSendingOrchestrator
21
+ from pyrit.orchestrator.multi_turn.red_teaming_orchestrator import RedTeamingOrchestrator
22
+ from pyrit.orchestrator.multi_turn.crescendo_orchestrator import CrescendoOrchestrator
23
+ from pyrit.orchestrator import Orchestrator
24
+ from pyrit.prompt_converter import PromptConverter
25
+ from pyrit.prompt_target import PromptChatTarget
26
+
27
+ # Local imports
28
+ from ._callback_chat_target import _CallbackChatTarget
29
+
30
+ # Retry imports
31
+ import httpx
32
+ import httpcore
33
+ import tenacity
34
+ from tenacity import retry
35
+
36
+ # Local imports
37
+ from ._attack_strategy import AttackStrategy
38
+ from ._attack_objective_generator import RiskCategory
39
+ from ._utils._rai_service_target import AzureRAIServiceTarget
40
+ from ._utils._rai_service_true_false_scorer import AzureRAIServiceTrueFalseScorer
41
+ from ._utils._rai_service_eval_chat_target import RAIServiceEvalChatTarget
42
+ from ._utils.constants import DATA_EXT, TASK_STATUS
43
+ from ._utils.logging_utils import log_strategy_start, log_error
44
+ from ._utils.formatting_utils import write_pyrit_outputs_to_file
45
+
46
+
47
+ def network_retry_decorator(retry_config, logger, strategy_name, risk_category_name, prompt_idx=None):
48
+ """Create a reusable retry decorator for network operations.
49
+
50
+ :param retry_config: Retry configuration dictionary
51
+ :param logger: Logger instance for logging warnings
52
+ :param strategy_name: Name of the attack strategy
53
+ :param risk_category_name: Name of the risk category
54
+ :param prompt_idx: Optional prompt index for detailed logging
55
+ :return: Configured retry decorator
56
+ """
57
+
58
+ def decorator(func):
59
+ @retry(**retry_config["network_retry"])
60
+ async def wrapper(*args, **kwargs):
61
+ try:
62
+ return await func(*args, **kwargs)
63
+ except (
64
+ httpx.ConnectTimeout,
65
+ httpx.ReadTimeout,
66
+ httpx.ConnectError,
67
+ httpx.HTTPError,
68
+ ConnectionError,
69
+ TimeoutError,
70
+ OSError,
71
+ asyncio.TimeoutError,
72
+ httpcore.ReadTimeout,
73
+ httpx.HTTPStatusError,
74
+ ) as e:
75
+ prompt_detail = f" for prompt {prompt_idx}" if prompt_idx is not None else ""
76
+ logger.warning(
77
+ f"Network error{prompt_detail} for {strategy_name}/{risk_category_name}: {type(e).__name__}: {str(e)}"
78
+ )
79
+ await asyncio.sleep(2)
80
+ raise
81
+
82
+ return wrapper
83
+
84
+ return decorator
85
+
86
+
87
+ class OrchestratorManager:
88
+ """Manages PyRIT orchestrators for different attack strategies."""
89
+
90
+ def __init__(
91
+ self,
92
+ logger,
93
+ generated_rai_client,
94
+ credential,
95
+ azure_ai_project,
96
+ one_dp_project,
97
+ retry_config,
98
+ scan_output_dir=None,
99
+ red_team=None,
100
+ ):
101
+ """Initialize the orchestrator manager.
102
+
103
+ :param logger: Logger instance for logging
104
+ :param generated_rai_client: RAI client for service interactions
105
+ :param credential: Authentication credential
106
+ :param azure_ai_project: Azure AI project configuration
107
+ :param one_dp_project: Whether this is a OneDP project
108
+ :param retry_config: Retry configuration for network errors
109
+ :param scan_output_dir: Directory for scan outputs
110
+ :param red_team: Reference to RedTeam instance for accessing prompt mappings
111
+ """
112
+ self.logger = logger
113
+ self.generated_rai_client = generated_rai_client
114
+ self.credential = credential
115
+ self.azure_ai_project = azure_ai_project
116
+ self._one_dp_project = one_dp_project
117
+ self.retry_config = retry_config
118
+ self.scan_output_dir = scan_output_dir
119
+ self.red_team = red_team
120
+
121
+ def _calculate_timeout(self, base_timeout: int, orchestrator_type: str) -> int:
122
+ """Calculate appropriate timeout based on orchestrator type.
123
+
124
+ Multi-turn and crescendo orchestrators need more generous timeouts due to their
125
+ iterative nature and multiple API calls per prompt.
126
+
127
+ :param base_timeout: Base timeout value in seconds
128
+ :param orchestrator_type: Type of orchestrator ('single', 'multi_turn', 'crescendo')
129
+ :return: Calculated timeout in seconds
130
+ """
131
+ timeout_multipliers = {
132
+ "single": 1.0, # Standard timeout for single-turn
133
+ "multi_turn": 3.0, # 3x timeout for multi-turn interactions
134
+ "crescendo": 4.0, # 4x timeout for crescendo with backtracks
135
+ }
136
+
137
+ multiplier = timeout_multipliers.get(orchestrator_type, 1.0)
138
+ calculated_timeout = int(base_timeout * multiplier)
139
+
140
+ self.logger.debug(
141
+ f"Calculated timeout for {orchestrator_type} orchestrator: {calculated_timeout}s "
142
+ f"(base: {base_timeout}s, multiplier: {multiplier}x)"
143
+ )
144
+
145
+ return calculated_timeout
146
+
147
+ def get_orchestrator_for_attack_strategy(
148
+ self, attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
149
+ ) -> Callable:
150
+ """Get appropriate orchestrator function for the specified attack strategy.
151
+
152
+ :param attack_strategy: Attack strategy to get orchestrator for
153
+ :type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
154
+ :return: Callable orchestrator function
155
+ :rtype: Callable
156
+ """
157
+ if isinstance(attack_strategy, list):
158
+ if AttackStrategy.MultiTurn in attack_strategy or AttackStrategy.Crescendo in attack_strategy:
159
+ self.logger.error("MultiTurn and Crescendo strategies are not supported in composed attacks.")
160
+ raise ValueError("MultiTurn and Crescendo strategies are not supported in composed attacks.")
161
+ elif AttackStrategy.MultiTurn == attack_strategy:
162
+ return self._multi_turn_orchestrator
163
+ elif AttackStrategy.Crescendo == attack_strategy:
164
+ return self._crescendo_orchestrator
165
+ return self._prompt_sending_orchestrator
166
+
167
+ async def _prompt_sending_orchestrator(
168
+ self,
169
+ chat_target: PromptChatTarget,
170
+ all_prompts: List[str],
171
+ converter: Union[PromptConverter, List[PromptConverter]],
172
+ *,
173
+ strategy_name: str = "unknown",
174
+ risk_category_name: str = "unknown",
175
+ risk_category: Optional[RiskCategory] = None,
176
+ timeout: int = 120,
177
+ red_team_info: Dict = None,
178
+ task_statuses: Dict = None,
179
+ prompt_to_context: Dict[str, str] = None,
180
+ ) -> Orchestrator:
181
+ """Send prompts via the PromptSendingOrchestrator.
182
+
183
+ :param chat_target: The target to send prompts to
184
+ :type chat_target: PromptChatTarget
185
+ :param all_prompts: List of prompts to process and send
186
+ :type all_prompts: List[str]
187
+ :param converter: Prompt converter or list of converters to transform prompts
188
+ :type converter: Union[PromptConverter, List[PromptConverter]]
189
+ :param strategy_name: Name of the attack strategy being used
190
+ :type strategy_name: str
191
+ :param risk_category_name: Name of the risk category being evaluated
192
+ :type risk_category_name: str
193
+ :param risk_category: Risk category being evaluated
194
+ :type risk_category: Optional[RiskCategory]
195
+ :param timeout: Timeout in seconds for each prompt
196
+ :type timeout: int
197
+ :param red_team_info: Dictionary to store file paths and results
198
+ :type red_team_info: Dict
199
+ :param task_statuses: Dictionary to track task statuses
200
+ :type task_statuses: Dict
201
+ :param prompt_to_context: Dictionary mapping prompts to their contexts (string or dict format)
202
+ :type prompt_to_context: Dict[str, Union[str, Dict]]
203
+ :return: Configured and initialized orchestrator
204
+ :rtype: Orchestrator
205
+ """
206
+ task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
207
+ if task_statuses:
208
+ task_statuses[task_key] = TASK_STATUS["RUNNING"]
209
+
210
+ log_strategy_start(self.logger, strategy_name, risk_category_name)
211
+
212
+ # Create converter list from single converter or list of converters
213
+ converter_list = (
214
+ [converter] if converter and isinstance(converter, PromptConverter) else converter if converter else []
215
+ )
216
+
217
+ # Log which converter is being used
218
+ if converter_list:
219
+ if isinstance(converter_list, list) and len(converter_list) > 0:
220
+ converter_names = [c.__class__.__name__ for c in converter_list if c is not None]
221
+ self.logger.debug(f"Using converters: {', '.join(converter_names)}")
222
+ elif converter is not None:
223
+ self.logger.debug(f"Using converter: {converter.__class__.__name__}")
224
+ else:
225
+ self.logger.debug("No converters specified")
226
+
227
+ # Initialize orchestrator
228
+ try:
229
+ orchestrator = PromptSendingOrchestrator(objective_target=chat_target, prompt_converters=converter_list)
230
+
231
+ if not all_prompts:
232
+ self.logger.warning(f"No prompts provided to orchestrator for {strategy_name}/{risk_category_name}")
233
+ if task_statuses:
234
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
235
+ return orchestrator
236
+
237
+ # Initialize output path for memory labelling
238
+ base_path = str(uuid.uuid4())
239
+
240
+ # If scan output directory exists, place the file there
241
+ if self.scan_output_dir:
242
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
243
+ else:
244
+ output_path = f"{base_path}{DATA_EXT}"
245
+
246
+ if red_team_info:
247
+ red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
248
+
249
+ # Process prompts one at a time like multi-turn and crescendo orchestrators
250
+ self.logger.debug(f"Processing {len(all_prompts)} prompts for {strategy_name}/{risk_category_name}")
251
+
252
+ # Calculate appropriate timeout for single-turn orchestrator
253
+ calculated_timeout = self._calculate_timeout(timeout, "single")
254
+
255
+ for prompt_idx, prompt in enumerate(all_prompts):
256
+ prompt_start_time = datetime.now()
257
+ self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
258
+
259
+ # Get context for this prompt
260
+ context_data = prompt_to_context.get(prompt, {}) if prompt_to_context else {}
261
+
262
+ # Normalize context_data: handle both string (legacy) and dict formats
263
+ # If context_data is a string, convert it to the expected dict format
264
+ if isinstance(context_data, str):
265
+ context_data = {"contexts": [{"content": context_data}]} if context_data else {"contexts": []}
266
+
267
+ # context_data is now always a dict with a 'contexts' list
268
+ # Each item in contexts is a dict with 'content' key
269
+ # context_type and tool_name can be present per-context
270
+ contexts = context_data.get("contexts", [])
271
+
272
+ # Check if any context has agent-specific fields (context_type, tool_name)
273
+ has_agent_fields = any(
274
+ isinstance(ctx, dict)
275
+ and ("context_type" in ctx and "tool_name" in ctx and ctx["tool_name"] is not None)
276
+ for ctx in contexts
277
+ )
278
+
279
+ # Build context_dict to pass via memory labels
280
+ context_dict = {"contexts": contexts}
281
+
282
+ # Get risk_sub_type for this prompt if it exists
283
+ risk_sub_type = (
284
+ self.red_team.prompt_to_risk_subtype.get(prompt)
285
+ if self.red_team and hasattr(self.red_team, "prompt_to_risk_subtype")
286
+ else None
287
+ )
288
+
289
+ try:
290
+ # Create retry-enabled function using the reusable decorator
291
+ @network_retry_decorator(
292
+ self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
293
+ )
294
+ async def send_prompt_with_retry():
295
+ memory_labels = {
296
+ "risk_strategy_path": output_path,
297
+ "batch": prompt_idx + 1,
298
+ "context": context_dict,
299
+ }
300
+ if risk_sub_type:
301
+ memory_labels["risk_sub_type"] = risk_sub_type
302
+ return await asyncio.wait_for(
303
+ orchestrator.send_prompts_async(
304
+ prompt_list=[prompt],
305
+ memory_labels=memory_labels,
306
+ ),
307
+ timeout=calculated_timeout,
308
+ )
309
+
310
+ # Execute the retry-enabled function
311
+ await send_prompt_with_retry()
312
+ prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
313
+ self.logger.debug(
314
+ f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds"
315
+ )
316
+
317
+ # Print progress to console
318
+ if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
319
+ print(
320
+ f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}"
321
+ )
322
+
323
+ except (asyncio.TimeoutError, tenacity.RetryError):
324
+ self.logger.warning(
325
+ f"Prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with remaining prompts"
326
+ )
327
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Prompt {prompt_idx+1}")
328
+ # Set task status to TIMEOUT for this specific prompt
329
+ batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
330
+ if task_statuses:
331
+ task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
332
+ if red_team_info:
333
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
334
+ continue
335
+ except Exception as e:
336
+ log_error(
337
+ self.logger,
338
+ f"Error processing prompt {prompt_idx+1}",
339
+ e,
340
+ f"{strategy_name}/{risk_category_name}",
341
+ )
342
+ if red_team_info:
343
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
344
+ continue
345
+
346
+ if task_statuses:
347
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
348
+ return orchestrator
349
+
350
+ except Exception as e:
351
+ log_error(
352
+ self.logger,
353
+ "Failed to initialize orchestrator",
354
+ e,
355
+ f"{strategy_name}/{risk_category_name}",
356
+ )
357
+ if task_statuses:
358
+ task_statuses[task_key] = TASK_STATUS["FAILED"]
359
+ raise
360
+
361
+ async def _multi_turn_orchestrator(
362
+ self,
363
+ chat_target: PromptChatTarget,
364
+ all_prompts: List[str],
365
+ converter: Union[PromptConverter, List[PromptConverter]],
366
+ *,
367
+ strategy_name: str = "unknown",
368
+ risk_category_name: str = "unknown",
369
+ risk_category: Optional[RiskCategory] = None,
370
+ timeout: int = 120,
371
+ red_team_info: Dict = None,
372
+ task_statuses: Dict = None,
373
+ prompt_to_context: Dict[str, Union[str, Dict]] = None,
374
+ ) -> Orchestrator:
375
+ """Send prompts via the RedTeamingOrchestrator (multi-turn orchestrator).
376
+
377
+ :param chat_target: The target to send prompts to
378
+ :type chat_target: PromptChatTarget
379
+ :param all_prompts: List of prompts to process and send
380
+ :type all_prompts: List[str]
381
+ :param converter: Prompt converter or list of converters to transform prompts
382
+ :type converter: Union[PromptConverter, List[PromptConverter]]
383
+ :param strategy_name: Name of the attack strategy being used
384
+ :type strategy_name: str
385
+ :param risk_category_name: Name of the risk category being evaluated
386
+ :type risk_category_name: str
387
+ :param risk_category: Risk category being evaluated
388
+ :type risk_category: Optional[RiskCategory]
389
+ :param timeout: Timeout in seconds for each prompt
390
+ :type timeout: int
391
+ :param red_team_info: Dictionary to store file paths and results
392
+ :type red_team_info: Dict
393
+ :param task_statuses: Dictionary to track task statuses
394
+ :type task_statuses: Dict
395
+ :return: Configured and initialized orchestrator
396
+ :rtype: Orchestrator
397
+ """
398
+ max_turns = 5 # Set a default max turns value
399
+ task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
400
+ if task_statuses:
401
+ task_statuses[task_key] = TASK_STATUS["RUNNING"]
402
+
403
+ log_strategy_start(self.logger, strategy_name, risk_category_name)
404
+ converter_list = []
405
+ # Create converter list from single converter or list of converters
406
+ if converter and isinstance(converter, PromptConverter):
407
+ converter_list = [converter]
408
+ elif converter and isinstance(converter, list):
409
+ # Filter out None values from the converter list
410
+ converter_list = [c for c in converter if c is not None]
411
+
412
+ # Log which converter is being used
413
+ if converter_list:
414
+ if isinstance(converter_list, list) and len(converter_list) > 0:
415
+ converter_names = [c.__class__.__name__ for c in converter_list if c is not None]
416
+ self.logger.debug(f"Using converters: {', '.join(converter_names)}")
417
+ elif converter is not None:
418
+ self.logger.debug(f"Using converter: {converter.__class__.__name__}")
419
+ else:
420
+ self.logger.debug("No converters specified")
421
+
422
+ # Initialize output path for memory labelling
423
+ base_path = str(uuid.uuid4())
424
+
425
+ # If scan output directory exists, place the file there
426
+ if self.scan_output_dir:
427
+ # Ensure the directory exists
428
+ os.makedirs(self.scan_output_dir, exist_ok=True)
429
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
430
+ else:
431
+ output_path = f"{base_path}{DATA_EXT}"
432
+
433
+ if red_team_info:
434
+ red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
435
+
436
+ # Calculate appropriate timeout for multi-turn orchestrator
437
+ calculated_timeout = self._calculate_timeout(timeout, "multi_turn")
438
+
439
+ for prompt_idx, prompt in enumerate(all_prompts):
440
+ prompt_start_time = datetime.now()
441
+ self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
442
+
443
+ # Get context for this prompt
444
+ context_data = prompt_to_context.get(prompt, {}) if prompt_to_context else {}
445
+
446
+ # Normalize context_data: handle both string (legacy) and dict formats
447
+ # If context_data is a string, convert it to the expected dict format
448
+ if isinstance(context_data, str):
449
+ context_data = {"contexts": [{"content": context_data}]} if context_data else {"contexts": []}
450
+
451
+ # context_data is now always a dict with a 'contexts' list
452
+ # Each item in contexts is a dict with 'content' key
453
+ # context_type and tool_name can be present per-context
454
+ contexts = context_data.get("contexts", [])
455
+
456
+ # Check if any context has agent-specific fields (context_type, tool_name)
457
+ has_agent_fields = any(
458
+ isinstance(ctx, dict) and ("context_type" in ctx or "tool_name" in ctx) for ctx in contexts
459
+ )
460
+
461
+ # Build context_dict to pass via memory labels
462
+ context_dict = {"contexts": contexts}
463
+
464
+ # Get risk_sub_type for this prompt if it exists
465
+ risk_sub_type = (
466
+ self.red_team.prompt_to_risk_subtype.get(prompt)
467
+ if self.red_team and hasattr(self.red_team, "prompt_to_risk_subtype")
468
+ else None
469
+ )
470
+
471
+ # For backwards compatibility with scoring, extract string context
472
+ # This is used by AzureRAIServiceTrueFalseScorer which expects a string
473
+ context_string = ""
474
+ if contexts:
475
+ context_string = "\n".join(
476
+ ctx.get("content", "") if isinstance(ctx, dict) else str(ctx) for ctx in contexts
477
+ )
478
+
479
+ try:
480
+ azure_rai_service_scorer = AzureRAIServiceTrueFalseScorer(
481
+ client=self.generated_rai_client,
482
+ api_version=None,
483
+ model="gpt-4",
484
+ logger=self.logger,
485
+ credential=self.credential,
486
+ risk_category=risk_category,
487
+ azure_ai_project=self.azure_ai_project,
488
+ context=context_string,
489
+ )
490
+
491
+ azure_rai_service_target = AzureRAIServiceTarget(
492
+ client=self.generated_rai_client,
493
+ api_version=None,
494
+ model="gpt-4",
495
+ prompt_template_key="orchestrators/red_teaming/text_generation.yaml",
496
+ objective=prompt,
497
+ logger=self.logger,
498
+ is_one_dp_project=self._one_dp_project,
499
+ )
500
+
501
+ orchestrator = RedTeamingOrchestrator(
502
+ objective_target=chat_target,
503
+ adversarial_chat=azure_rai_service_target,
504
+ max_turns=max_turns,
505
+ prompt_converters=converter_list,
506
+ objective_scorer=azure_rai_service_scorer,
507
+ use_score_as_feedback=False,
508
+ )
509
+
510
+ try:
511
+ # Create retry-enabled function using the reusable decorator
512
+ @network_retry_decorator(
513
+ self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
514
+ )
515
+ async def send_prompt_with_retry():
516
+ memory_labels = {
517
+ "risk_strategy_path": output_path,
518
+ "batch": prompt_idx + 1,
519
+ "context": context_dict,
520
+ }
521
+ if risk_sub_type:
522
+ memory_labels["risk_sub_type"] = risk_sub_type
523
+ return await asyncio.wait_for(
524
+ orchestrator.run_attack_async(
525
+ objective=prompt,
526
+ memory_labels=memory_labels,
527
+ ),
528
+ timeout=calculated_timeout,
529
+ )
530
+
531
+ # Execute the retry-enabled function
532
+ await send_prompt_with_retry()
533
+ prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
534
+ self.logger.debug(
535
+ f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds"
536
+ )
537
+
538
+ # Write outputs to file after each prompt is processed
539
+ write_pyrit_outputs_to_file(
540
+ output_path=output_path,
541
+ logger=self.logger,
542
+ prompt_to_context=prompt_to_context,
543
+ )
544
+
545
+ # Print progress to console
546
+ if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
547
+ print(
548
+ f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}"
549
+ )
550
+
551
+ except (asyncio.TimeoutError, tenacity.RetryError):
552
+ self.logger.warning(
553
+ f"Batch {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with partial results"
554
+ )
555
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1}")
556
+ # Set task status to TIMEOUT
557
+ batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
558
+ if task_statuses:
559
+ task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
560
+ if red_team_info:
561
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
562
+ continue
563
+ except Exception as e:
564
+ log_error(
565
+ self.logger,
566
+ f"Error processing prompt {prompt_idx+1}",
567
+ e,
568
+ f"{strategy_name}/{risk_category_name}",
569
+ )
570
+ if red_team_info:
571
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
572
+ continue
573
+ except Exception as e:
574
+ log_error(
575
+ self.logger,
576
+ "Failed to initialize orchestrator",
577
+ e,
578
+ f"{strategy_name}/{risk_category_name}",
579
+ )
580
+ if task_statuses:
581
+ task_statuses[task_key] = TASK_STATUS["FAILED"]
582
+ raise
583
+ if task_statuses:
584
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
585
+ return orchestrator
586
+
587
+ async def _crescendo_orchestrator(
588
+ self,
589
+ chat_target: PromptChatTarget,
590
+ all_prompts: List[str],
591
+ converter: Union[PromptConverter, List[PromptConverter]],
592
+ *,
593
+ strategy_name: str = "unknown",
594
+ risk_category_name: str = "unknown",
595
+ risk_category: Optional[RiskCategory] = None,
596
+ timeout: int = 120,
597
+ red_team_info: Dict = None,
598
+ task_statuses: Dict = None,
599
+ prompt_to_context: Dict[str, Union[str, Dict]] = None,
600
+ ) -> Orchestrator:
601
+ """Send prompts via the CrescendoOrchestrator with optimized performance.
602
+
603
+ :param chat_target: The target to send prompts to
604
+ :type chat_target: PromptChatTarget
605
+ :param all_prompts: List of prompts to process and send
606
+ :type all_prompts: List[str]
607
+ :param converter: Prompt converter or list of converters to transform prompts
608
+ :type converter: Union[PromptConverter, List[PromptConverter]]
609
+ :param strategy_name: Name of the attack strategy being used
610
+ :type strategy_name: str
611
+ :param risk_category_name: Name of the risk category being evaluated
612
+ :type risk_category_name: str
613
+ :param risk_category: Risk category being evaluated
614
+ :type risk_category: Optional[RiskCategory]
615
+ :param timeout: Timeout in seconds for each prompt
616
+ :type timeout: int
617
+ :param red_team_info: Dictionary to store file paths and results
618
+ :type red_team_info: Dict
619
+ :param task_statuses: Dictionary to track task statuses
620
+ :type task_statuses: Dict
621
+ :return: Configured and initialized orchestrator
622
+ :rtype: Orchestrator
623
+ """
624
+ max_turns = 10 # Set a default max turns value
625
+ max_backtracks = 5
626
+ task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
627
+ if task_statuses:
628
+ task_statuses[task_key] = TASK_STATUS["RUNNING"]
629
+
630
+ log_strategy_start(self.logger, strategy_name, risk_category_name)
631
+
632
+ # Initialize output path for memory labelling
633
+ base_path = str(uuid.uuid4())
634
+
635
+ # If scan output directory exists, place the file there
636
+ if self.scan_output_dir:
637
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
638
+ else:
639
+ output_path = f"{base_path}{DATA_EXT}"
640
+
641
+ if red_team_info:
642
+ red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
643
+
644
+ # Calculate appropriate timeout for crescendo orchestrator
645
+ calculated_timeout = self._calculate_timeout(timeout, "crescendo")
646
+
647
+ for prompt_idx, prompt in enumerate(all_prompts):
648
+ prompt_start_time = datetime.now()
649
+ self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
650
+
651
+ # Get context for this prompt
652
+ context_data = prompt_to_context.get(prompt, {}) if prompt_to_context else {}
653
+
654
+ # Normalize context_data: handle both string (legacy) and dict formats
655
+ # If context_data is a string, convert it to the expected dict format
656
+ if isinstance(context_data, str):
657
+ context_data = {"contexts": [{"content": context_data}]} if context_data else {"contexts": []}
658
+
659
+ # context_data is now always a dict with a 'contexts' list
660
+ # Each item in contexts is a dict with 'content' key
661
+ # context_type and tool_name can be present per-context
662
+ contexts = context_data.get("contexts", [])
663
+
664
+ # Check if any context has agent-specific fields (context_type, tool_name)
665
+ has_agent_fields = any(
666
+ isinstance(ctx, dict)
667
+ and ("context_type" in ctx and "tool_name" in ctx and ctx["tool_name"] is not None)
668
+ for ctx in contexts
669
+ )
670
+
671
+ # Build context_dict to pass via memory labels
672
+ context_dict = {"contexts": contexts}
673
+
674
+ # Get risk_sub_type for this prompt if it exists
675
+ risk_sub_type = (
676
+ self.red_team.prompt_to_risk_subtype.get(prompt)
677
+ if self.red_team and hasattr(self.red_team, "prompt_to_risk_subtype")
678
+ else None
679
+ )
680
+
681
+ # For backwards compatibility with scoring, extract string context
682
+ # This is used by AzureRAIServiceTrueFalseScorer and RAIServiceEvalChatTarget which expect a string
683
+ context_string = ""
684
+ if contexts:
685
+ context_string = "\n".join(
686
+ ctx.get("content", "") if isinstance(ctx, dict) else str(ctx) for ctx in contexts
687
+ )
688
+
689
+ try:
690
+ red_llm_scoring_target = RAIServiceEvalChatTarget(
691
+ logger=self.logger,
692
+ credential=self.credential,
693
+ risk_category=risk_category,
694
+ azure_ai_project=self.azure_ai_project,
695
+ context=context_string,
696
+ )
697
+
698
+ azure_rai_service_target = AzureRAIServiceTarget(
699
+ client=self.generated_rai_client,
700
+ api_version=None,
701
+ model="gpt-4",
702
+ prompt_template_key="orchestrators/crescendo/crescendo_variant_1.yaml",
703
+ objective=prompt,
704
+ logger=self.logger,
705
+ crescendo_format=True,
706
+ is_one_dp_project=self._one_dp_project,
707
+ )
708
+
709
+ orchestrator = CrescendoOrchestrator(
710
+ objective_target=chat_target,
711
+ adversarial_chat=azure_rai_service_target,
712
+ max_turns=max_turns,
713
+ scoring_target=red_llm_scoring_target,
714
+ max_backtracks=max_backtracks,
715
+ )
716
+
717
+ orchestrator._objective_scorer = AzureRAIServiceTrueFalseScorer(
718
+ client=self.generated_rai_client,
719
+ api_version=None,
720
+ model="gpt-4",
721
+ logger=self.logger,
722
+ credential=self.credential,
723
+ risk_category=risk_category,
724
+ azure_ai_project=self.azure_ai_project,
725
+ context=context_string,
726
+ )
727
+
728
+ try:
729
+ # Create retry-enabled function using the reusable decorator
730
+ @network_retry_decorator(
731
+ self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
732
+ )
733
+ async def send_prompt_with_retry():
734
+ memory_labels = {
735
+ "risk_strategy_path": output_path,
736
+ "batch": prompt_idx + 1,
737
+ "context": context_dict,
738
+ }
739
+ if risk_sub_type:
740
+ memory_labels["risk_sub_type"] = risk_sub_type
741
+ return await asyncio.wait_for(
742
+ orchestrator.run_attack_async(
743
+ objective=prompt,
744
+ memory_labels=memory_labels,
745
+ ),
746
+ timeout=calculated_timeout,
747
+ )
748
+
749
+ # Execute the retry-enabled function
750
+ await send_prompt_with_retry()
751
+ prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
752
+ self.logger.debug(
753
+ f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds"
754
+ )
755
+
756
+ # Write outputs to file after each prompt is processed
757
+ write_pyrit_outputs_to_file(
758
+ output_path=output_path,
759
+ logger=self.logger,
760
+ prompt_to_context=prompt_to_context,
761
+ )
762
+
763
+ # Print progress to console
764
+ if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
765
+ print(
766
+ f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}"
767
+ )
768
+
769
+ except (asyncio.TimeoutError, tenacity.RetryError):
770
+ self.logger.warning(
771
+ f"Batch {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with partial results"
772
+ )
773
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1}")
774
+ # Set task status to TIMEOUT
775
+ batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
776
+ if task_statuses:
777
+ task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
778
+ if red_team_info:
779
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
780
+ continue
781
+ except Exception as e:
782
+ log_error(
783
+ self.logger,
784
+ f"Error processing prompt {prompt_idx+1}",
785
+ e,
786
+ f"{strategy_name}/{risk_category_name}",
787
+ )
788
+ if red_team_info:
789
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
790
+ continue
791
+ except Exception as e:
792
+ log_error(
793
+ self.logger,
794
+ "Failed to initialize orchestrator",
795
+ e,
796
+ f"{strategy_name}/{risk_category_name}",
797
+ )
798
+ if task_statuses:
799
+ task_statuses[task_key] = TASK_STATUS["FAILED"]
800
+ raise
801
+ if task_statuses:
802
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
803
+ return orchestrator