azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show
  1. azure/ai/evaluation/__init__.py +27 -1
  2. azure/ai/evaluation/_azure/_models.py +6 -6
  3. azure/ai/evaluation/_common/constants.py +6 -2
  4. azure/ai/evaluation/_common/rai_service.py +39 -5
  5. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  6. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  7. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  8. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  9. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  10. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  11. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  12. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  13. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  14. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  15. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  16. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  17. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  18. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  19. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  20. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  21. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  22. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  23. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  24. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
  25. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  26. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  27. azure/ai/evaluation/_common/utils.py +23 -3
  28. azure/ai/evaluation/_constants.py +7 -0
  29. azure/ai/evaluation/_converters/__init__.py +3 -0
  30. azure/ai/evaluation/_converters/_ai_services.py +804 -0
  31. azure/ai/evaluation/_converters/_models.py +302 -0
  32. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
  33. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
  34. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  35. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  36. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
  37. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
  38. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
  39. azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
  40. azure/ai/evaluation/_evaluate/_evaluate.py +109 -64
  41. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
  42. azure/ai/evaluation/_evaluate/_utils.py +3 -3
  43. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
  44. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  45. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
  46. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
  47. azure/ai/evaluation/_evaluators/_common/_base_eval.py +44 -4
  48. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +4 -2
  49. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +44 -5
  50. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
  51. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
  52. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
  53. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
  54. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
  55. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
  56. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
  57. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
  58. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
  59. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +22 -4
  60. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  61. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
  62. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
  63. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
  64. azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
  65. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
  66. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  67. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +158 -0
  68. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
  69. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
  70. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
  72. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
  73. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  74. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
  75. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
  76. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  77. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
  78. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
  79. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  80. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
  81. azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
  82. azure/ai/evaluation/_exceptions.py +5 -0
  83. azure/ai/evaluation/_legacy/__init__.py +3 -0
  84. azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
  85. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  86. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  87. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  88. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  89. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  90. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  91. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  92. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  93. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  94. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  95. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  96. azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
  97. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
  98. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  99. azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
  100. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
  101. azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
  102. azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
  103. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  104. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
  105. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  106. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
  107. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
  108. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  109. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  110. azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
  111. azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
  112. azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
  113. azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
  114. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  115. azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
  116. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  117. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
  118. azure/ai/evaluation/_version.py +1 -1
  119. azure/ai/evaluation/red_team/__init__.py +19 -0
  120. azure/ai/evaluation/red_team/_attack_objective_generator.py +195 -0
  121. azure/ai/evaluation/red_team/_attack_strategy.py +45 -0
  122. azure/ai/evaluation/red_team/_callback_chat_target.py +74 -0
  123. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  124. azure/ai/evaluation/red_team/_red_team.py +1887 -0
  125. azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  126. azure/ai/evaluation/red_team/_utils/__init__.py +3 -0
  127. azure/ai/evaluation/red_team/_utils/constants.py +65 -0
  128. azure/ai/evaluation/red_team/_utils/formatting_utils.py +165 -0
  129. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  130. azure/ai/evaluation/red_team/_utils/strategy_utils.py +192 -0
  131. azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
  132. azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
  133. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
  134. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
  135. azure/ai/evaluation/simulator/_simulator.py +1 -1
  136. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +80 -15
  137. azure_ai_evaluation-1.5.0.dist-info/RECORD +207 -0
  138. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +1 -1
  139. azure/ai/evaluation/simulator/_tracing.py +0 -89
  140. azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
  141. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
  142. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1887 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ # Third-party imports
5
+ import asyncio
6
+ import inspect
7
+ import math
8
+ import os
9
+ import logging
10
+ import tempfile
11
+ import time
12
+ from datetime import datetime
13
+ from typing import Callable, Dict, List, Optional, Union, cast
14
+ import json
15
+ from pathlib import Path
16
+ import itertools
17
+ import random
18
+ import uuid
19
+ import pandas as pd
20
+ from tqdm import tqdm
21
+
22
+ # Azure AI Evaluation imports
23
+ from azure.ai.evaluation._evaluate._eval_run import EvalRun
24
+ from azure.ai.evaluation._evaluate._utils import _trace_destination_from_project_scope
25
+ from azure.ai.evaluation._model_configurations import AzureAIProject
26
+ from azure.ai.evaluation._constants import EvaluationRunProperties, DefaultOpenEncoding, EVALUATION_PASS_FAIL_MAPPING
27
+ from azure.ai.evaluation._evaluate._utils import _get_ai_studio_url
28
+ from azure.ai.evaluation._evaluate._utils import extract_workspace_triad_from_trace_provider
29
+ from azure.ai.evaluation._version import VERSION
30
+ from azure.ai.evaluation._azure._clients import LiteMLClient
31
+ from azure.ai.evaluation._evaluate._utils import _write_output
32
+ from azure.ai.evaluation._common._experimental import experimental
33
+ from azure.ai.evaluation._model_configurations import EvaluationResult
34
+ from azure.ai.evaluation.simulator._model_tools import ManagedIdentityAPITokenManager, TokenScope, RAIClient
35
+ from azure.ai.evaluation.simulator._model_tools._generated_rai_client import GeneratedRAIClient
36
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
37
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
38
+ from azure.ai.evaluation._common.math import list_mean_nan_safe, is_none_or_nan
39
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project
40
+ from azure.ai.evaluation import evaluate
41
+
42
+ # Azure Core imports
43
+ from azure.core.credentials import TokenCredential
44
+
45
+ # Red Teaming imports
46
+ from ._red_team_result import RedTeamResult, RedTeamingScorecard, RedTeamingParameters, ScanResult
47
+ from ._attack_strategy import AttackStrategy
48
+ from ._attack_objective_generator import RiskCategory, _AttackObjectiveGenerator
49
+
50
+ # PyRIT imports
51
+ from pyrit.common import initialize_pyrit, DUCK_DB
52
+ from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget
53
+ from pyrit.models import ChatMessage
54
+ from pyrit.orchestrator.single_turn.prompt_sending_orchestrator import PromptSendingOrchestrator
55
+ from pyrit.orchestrator import Orchestrator
56
+ from pyrit.exceptions import PyritException
57
+ from pyrit.prompt_converter import PromptConverter, MathPromptConverter, Base64Converter, FlipConverter, MorseConverter, AnsiAttackConverter, AsciiArtConverter, AsciiSmugglerConverter, AtbashConverter, BinaryConverter, CaesarConverter, CharacterSpaceConverter, CharSwapGenerator, DiacriticConverter, LeetspeakConverter, UrlConverter, UnicodeSubstitutionConverter, UnicodeConfusableConverter, SuffixAppendConverter, StringJoinConverter, ROT13Converter
58
+
59
+ # Local imports - constants and utilities
60
+ from ._utils.constants import (
61
+ BASELINE_IDENTIFIER, DATA_EXT, RESULTS_EXT,
62
+ ATTACK_STRATEGY_COMPLEXITY_MAP, RISK_CATEGORY_EVALUATOR_MAP,
63
+ INTERNAL_TASK_TIMEOUT, TASK_STATUS
64
+ )
65
+ from ._utils.logging_utils import (
66
+ setup_logger, log_section_header, log_subsection_header,
67
+ log_strategy_start, log_strategy_completion, log_error
68
+ )
69
+
70
+ @experimental
71
+ class RedTeam():
72
+ """
73
+ This class uses various attack strategies to test the robustness of AI models against adversarial inputs.
74
+ It logs the results of these evaluations and provides detailed scorecards summarizing the attack success rates.
75
+
76
+ :param azure_ai_project: The Azure AI project configuration
77
+ :type azure_ai_project: dict
78
+ :param credential: The credential to authenticate with Azure services
79
+ :type credential: TokenCredential
80
+ :param risk_categories: List of risk categories to generate attack objectives for (optional if custom_attack_seed_prompts is provided)
81
+ :type risk_categories: Optional[List[RiskCategory]]
82
+ :param num_objectives: Number of objectives to generate per risk category
83
+ :type num_objectives: int
84
+ :param application_scenario: Description of the application scenario for context
85
+ :type application_scenario: Optional[str]
86
+ :param custom_attack_seed_prompts: Path to a JSON file containing custom attack seed prompts (can be absolute or relative path)
87
+ :type custom_attack_seed_prompts: Optional[str]
88
+ :param output_dir: Directory to store all output files. If None, files are created in the current working directory.
89
+ :type output_dir: Optional[str]
90
+ :param max_parallel_tasks: Maximum number of parallel tasks to run when scanning (default: 5)
91
+ :type max_parallel_tasks: int
92
+ """
93
+ def __init__(
94
+ self,
95
+ azure_ai_project,
96
+ credential,
97
+ *,
98
+ risk_categories: Optional[List[RiskCategory]] = None,
99
+ num_objectives: int = 10,
100
+ application_scenario: Optional[str] = None,
101
+ custom_attack_seed_prompts: Optional[str] = None,
102
+ output_dir=None
103
+ ):
104
+
105
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
106
+ self.credential = credential
107
+ self.output_dir = output_dir
108
+
109
+ # Initialize logger without output directory (will be updated during scan)
110
+ self.logger = setup_logger()
111
+
112
+ self.token_manager = ManagedIdentityAPITokenManager(
113
+ token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
114
+ logger=logging.getLogger("RedTeamLogger"),
115
+ credential=cast(TokenCredential, credential),
116
+ )
117
+
118
+ # Initialize task tracking
119
+ self.task_statuses = {}
120
+ self.total_tasks = 0
121
+ self.completed_tasks = 0
122
+ self.failed_tasks = 0
123
+ self.start_time = None
124
+ self.scan_id = None
125
+ self.scan_output_dir = None
126
+
127
+ self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
128
+ self.generated_rai_client = GeneratedRAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager.get_aad_credential()) #type: ignore
129
+
130
+ # Initialize a cache for attack objectives by risk category and strategy
131
+ self.attack_objectives = {}
132
+
133
+ # keep track of data and eval result file names
134
+ self.red_team_info = {}
135
+
136
+ initialize_pyrit(memory_db_type=DUCK_DB)
137
+
138
+ self.attack_objective_generator = _AttackObjectiveGenerator(risk_categories=risk_categories, num_objectives=num_objectives, application_scenario=application_scenario, custom_attack_seed_prompts=custom_attack_seed_prompts)
139
+
140
+ self.logger.debug("RedTeam initialized successfully")
141
+
142
+
143
+ def _start_redteam_mlflow_run(
144
+ self,
145
+ azure_ai_project: Optional[AzureAIProject] = None,
146
+ run_name: Optional[str] = None
147
+ ) -> EvalRun:
148
+ """Start an MLFlow run for the Red Team Agent evaluation.
149
+
150
+ :param azure_ai_project: Azure AI project details for logging
151
+ :type azure_ai_project: Optional[~azure.ai.evaluation.AzureAIProject]
152
+ :param run_name: Optional name for the MLFlow run
153
+ :type run_name: Optional[str]
154
+ :return: The MLFlow run object
155
+ :rtype: ~azure.ai.evaluation._evaluate._eval_run.EvalRun
156
+ """
157
+ if not azure_ai_project:
158
+ log_error(self.logger, "No azure_ai_project provided, cannot start MLFlow run")
159
+ raise EvaluationException(
160
+ message="No azure_ai_project provided",
161
+ blame=ErrorBlame.USER_ERROR,
162
+ category=ErrorCategory.MISSING_FIELD,
163
+ target=ErrorTarget.RED_TEAM
164
+ )
165
+
166
+ trace_destination = _trace_destination_from_project_scope(azure_ai_project)
167
+ if not trace_destination:
168
+ self.logger.warning("Could not determine trace destination from project scope")
169
+ raise EvaluationException(
170
+ message="Could not determine trace destination",
171
+ blame=ErrorBlame.SYSTEM_ERROR,
172
+ category=ErrorCategory.UNKNOWN,
173
+ target=ErrorTarget.RED_TEAM
174
+ )
175
+
176
+ ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
177
+
178
+ management_client = LiteMLClient(
179
+ subscription_id=ws_triad.subscription_id,
180
+ resource_group=ws_triad.resource_group_name,
181
+ logger=self.logger,
182
+ credential=azure_ai_project.get("credential")
183
+ )
184
+
185
+ tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
186
+
187
+ run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
188
+ self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
189
+
190
+ eval_run = EvalRun(
191
+ run_name=run_display_name,
192
+ tracking_uri=cast(str, tracking_uri),
193
+ subscription_id=ws_triad.subscription_id,
194
+ group_name=ws_triad.resource_group_name,
195
+ workspace_name=ws_triad.workspace_name,
196
+ management_client=management_client, # type: ignore
197
+ )
198
+
199
+ self.trace_destination = trace_destination
200
+ self.logger.debug(f"MLFlow run created successfully with ID: {eval_run}")
201
+
202
+ return eval_run
203
+
204
+
205
+ async def _log_redteam_results_to_mlflow(
206
+ self,
207
+ redteam_output: RedTeamResult,
208
+ eval_run: EvalRun,
209
+ data_only: bool = False,
210
+ ) -> Optional[str]:
211
+ """Log the Red Team Agent results to MLFlow.
212
+
213
+ :param redteam_output: The output from the red team agent evaluation
214
+ :type redteam_output: ~azure.ai.evaluation.RedTeamOutput
215
+ :param eval_run: The MLFlow run object
216
+ :type eval_run: ~azure.ai.evaluation._evaluate._eval_run.EvalRun
217
+ :param data_only: Whether to log only data without evaluation results
218
+ :type data_only: bool
219
+ :return: The URL to the run in Azure AI Studio, if available
220
+ :rtype: Optional[str]
221
+ """
222
+ self.logger.debug(f"Logging results to MLFlow, data_only={data_only}")
223
+ artifact_name = "instance_results.json" if not data_only else "instance_data.json"
224
+
225
+ # If we have a scan output directory, save the results there first
226
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
227
+ artifact_path = os.path.join(self.scan_output_dir, artifact_name)
228
+ self.logger.debug(f"Saving artifact to scan output directory: {artifact_path}")
229
+
230
+ with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
231
+ if data_only:
232
+ # In data_only mode, we write the conversations in conversation/messages format
233
+ f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
234
+ elif redteam_output.scan_result:
235
+ json.dump(redteam_output.scan_result, f)
236
+
237
+ eval_info_name = "redteam_info.json"
238
+ eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
239
+ self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
240
+ with open (eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
241
+ # Remove evaluation_result from red_team_info before logging
242
+ red_team_info_logged = {}
243
+ for strategy, harms_dict in self.red_team_info.items():
244
+ red_team_info_logged[strategy] = {}
245
+ for harm, info_dict in harms_dict.items():
246
+ info_dict.pop("evaluation_result", None)
247
+ red_team_info_logged[strategy][harm] = info_dict
248
+ f.write(json.dumps(red_team_info_logged))
249
+
250
+ # Also save a human-readable scorecard if available
251
+ if not data_only and redteam_output.scan_result:
252
+ scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
253
+ with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
254
+ f.write(self._to_scorecard(redteam_output.scan_result))
255
+ self.logger.debug(f"Saved scorecard to: {scorecard_path}")
256
+
257
+ # Create a dedicated artifacts directory with proper structure for MLFlow
258
+ # MLFlow requires the artifact_name file to be in the directory we're logging
259
+
260
+ import tempfile
261
+ with tempfile.TemporaryDirectory() as tmpdir:
262
+ # First, create the main artifact file that MLFlow expects
263
+ with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
264
+ if data_only:
265
+ f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
266
+ elif redteam_output.scan_result:
267
+ redteam_output.scan_result["redteaming_scorecard"] = redteam_output.scan_result.get("scorecard", None)
268
+ redteam_output.scan_result["redteaming_parameters"] = redteam_output.scan_result.get("parameters", None)
269
+ redteam_output.scan_result["redteaming_data"] = redteam_output.scan_result.get("attack_details", None)
270
+
271
+ json.dump(redteam_output.scan_result, f)
272
+
273
+ # Copy all relevant files to the temp directory
274
+ import shutil
275
+ for file in os.listdir(self.scan_output_dir):
276
+ file_path = os.path.join(self.scan_output_dir, file)
277
+
278
+ # Skip directories and log files if not in debug mode
279
+ if os.path.isdir(file_path):
280
+ continue
281
+ if file.endswith('.log') and not os.environ.get('DEBUG'):
282
+ continue
283
+ if file == artifact_name or file == eval_info_name:
284
+ continue
285
+
286
+ try:
287
+ shutil.copy(file_path, os.path.join(tmpdir, file))
288
+ self.logger.debug(f"Copied file to artifact directory: {file}")
289
+ except Exception as e:
290
+ self.logger.warning(f"Failed to copy file {file} to artifact directory: {str(e)}")
291
+
292
+ # Log the entire directory to MLFlow
293
+ try:
294
+ eval_run.log_artifact(tmpdir, artifact_name)
295
+ eval_run.log_artifact(tmpdir, eval_info_name)
296
+ self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
297
+ except Exception as e:
298
+ self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
299
+
300
+ # Also log a direct property to capture the scan output directory
301
+ try:
302
+ eval_run.write_properties_to_run_history({"scan_output_dir": str(self.scan_output_dir)})
303
+ self.logger.debug("Logged scan_output_dir property to MLFlow")
304
+ except Exception as e:
305
+ self.logger.warning(f"Failed to log scan_output_dir property to MLFlow: {str(e)}")
306
+ else:
307
+ # Use temporary directory as before if no scan output directory exists
308
+ with tempfile.TemporaryDirectory() as tmpdir:
309
+ artifact_file = Path(tmpdir) / artifact_name
310
+ with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
311
+ if data_only:
312
+ f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
313
+ elif redteam_output.scan_result:
314
+ json.dump(redteam_output.scan_result, f)
315
+ eval_run.log_artifact(tmpdir, artifact_name)
316
+ self.logger.debug(f"Logged artifact: {artifact_name}")
317
+
318
+ eval_run.write_properties_to_run_history({
319
+ EvaluationRunProperties.RUN_TYPE: "eval_run",
320
+ "redteaming": "asr", # Red team agent specific run properties to help UI identify this as a redteaming run
321
+ EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
322
+ "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
323
+ })
324
+
325
+ if redteam_output.scan_result:
326
+ scorecard = redteam_output.scan_result["scorecard"]
327
+ joint_attack_summary = scorecard["joint_risk_attack_summary"]
328
+
329
+ if joint_attack_summary:
330
+ for risk_category_summary in joint_attack_summary:
331
+ risk_category = risk_category_summary.get("risk_category").lower()
332
+ for key, value in risk_category_summary.items():
333
+ if key != "risk_category":
334
+ eval_run.log_metric(f"{risk_category}_{key}", cast(float, value))
335
+ self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
336
+
337
+ self.logger.info("Successfully logged results to MLFlow")
338
+ return None
339
+
340
+ # Using the utility function from strategy_utils.py instead
341
+ def _strategy_converter_map(self):
342
+ from ._utils.strategy_utils import strategy_converter_map
343
+ return strategy_converter_map()
344
+
345
+ async def _get_attack_objectives(
346
+ self,
347
+ risk_category: Optional[RiskCategory] = None, # Now accepting a single risk category
348
+ application_scenario: Optional[str] = None,
349
+ strategy: Optional[str] = None
350
+ ) -> List[str]:
351
+ """Get attack objectives from the RAI client for a specific risk category or from a custom dataset.
352
+
353
+ :param attack_objective_generator: The generator with risk categories to get attack objectives for
354
+ :type attack_objective_generator: ~azure.ai.evaluation.redteam._AttackObjectiveGenerator
355
+ :param risk_category: The specific risk category to get objectives for
356
+ :type risk_category: Optional[RiskCategory]
357
+ :param application_scenario: Optional description of the application scenario for context
358
+ :type application_scenario: str
359
+ :param strategy: Optional attack strategy to get specific objectives for
360
+ :type strategy: str
361
+ :return: A list of attack objective prompts
362
+ :rtype: List[str]
363
+ """
364
+ attack_objective_generator = self.attack_objective_generator
365
+ # TODO: is this necessary?
366
+ if not risk_category:
367
+ self.logger.warning("No risk category provided, using the first category from the generator")
368
+ risk_category = attack_objective_generator.risk_categories[0] if attack_objective_generator.risk_categories else None
369
+ if not risk_category:
370
+ self.logger.error("No risk categories found in generator")
371
+ return []
372
+
373
+ # Convert risk category to lowercase for consistent caching
374
+ risk_cat_value = risk_category.value.lower()
375
+ num_objectives = attack_objective_generator.num_objectives
376
+
377
+ log_subsection_header(self.logger, f"Getting attack objectives for {risk_cat_value}, strategy: {strategy}")
378
+
379
+ # Check if we already have baseline objectives for this risk category
380
+ baseline_key = ((risk_cat_value,), "baseline")
381
+ baseline_objectives_exist = baseline_key in self.attack_objectives
382
+ current_key = ((risk_cat_value,), strategy)
383
+
384
+ # Check if custom attack seed prompts are provided in the generator
385
+ if attack_objective_generator.custom_attack_seed_prompts and attack_objective_generator.validated_prompts:
386
+ self.logger.info(f"Using custom attack seed prompts from {attack_objective_generator.custom_attack_seed_prompts}")
387
+
388
+ # Get the prompts for this risk category
389
+ custom_objectives = attack_objective_generator.valid_prompts_by_category.get(risk_cat_value, [])
390
+
391
+ if not custom_objectives:
392
+ self.logger.warning(f"No custom objectives found for risk category {risk_cat_value}")
393
+ return []
394
+
395
+ self.logger.info(f"Found {len(custom_objectives)} custom objectives for {risk_cat_value}")
396
+
397
+ # Sample if we have more than needed
398
+ if len(custom_objectives) > num_objectives:
399
+ selected_cat_objectives = random.sample(custom_objectives, num_objectives)
400
+ self.logger.info(f"Sampled {num_objectives} objectives from {len(custom_objectives)} available for {risk_cat_value}")
401
+ # Log ids of selected objectives for traceability
402
+ selected_ids = [obj.get("id", "unknown-id") for obj in selected_cat_objectives]
403
+ self.logger.debug(f"Selected objective IDs for {risk_cat_value}: {selected_ids}")
404
+ else:
405
+ selected_cat_objectives = custom_objectives
406
+ self.logger.info(f"Using all {len(custom_objectives)} available objectives for {risk_cat_value}")
407
+
408
+ # Handle jailbreak strategy - need to apply jailbreak prefixes to messages
409
+ if strategy == "jailbreak":
410
+ self.logger.debug("Applying jailbreak prefixes to custom objectives")
411
+ try:
412
+ jailbreak_prefixes = await self.generated_rai_client.get_jailbreak_prefixes()
413
+ for objective in selected_cat_objectives:
414
+ if "messages" in objective and len(objective["messages"]) > 0:
415
+ message = objective["messages"][0]
416
+ if isinstance(message, dict) and "content" in message:
417
+ message["content"] = f"{random.choice(jailbreak_prefixes)} {message['content']}"
418
+ except Exception as e:
419
+ log_error(self.logger, "Error applying jailbreak prefixes to custom objectives", e)
420
+ # Continue with unmodified prompts instead of failing completely
421
+
422
+ # Extract content from selected objectives
423
+ selected_prompts = []
424
+ for obj in selected_cat_objectives:
425
+ if "messages" in obj and len(obj["messages"]) > 0:
426
+ message = obj["messages"][0]
427
+ if isinstance(message, dict) and "content" in message:
428
+ selected_prompts.append(message["content"])
429
+
430
+ # Process the selected objectives for caching
431
+ objectives_by_category = {risk_cat_value: []}
432
+
433
+ for obj in selected_cat_objectives:
434
+ obj_id = obj.get("id", f"obj-{uuid.uuid4()}")
435
+ target_harms = obj.get("metadata", {}).get("target_harms", [])
436
+ content = ""
437
+ if "messages" in obj and len(obj["messages"]) > 0:
438
+ content = obj["messages"][0].get("content", "")
439
+
440
+ if not content:
441
+ continue
442
+
443
+ obj_data = {
444
+ "id": obj_id,
445
+ "content": content
446
+ }
447
+ objectives_by_category[risk_cat_value].append(obj_data)
448
+
449
+ # Store in cache
450
+ self.attack_objectives[current_key] = {
451
+ "objectives_by_category": objectives_by_category,
452
+ "strategy": strategy,
453
+ "risk_category": risk_cat_value,
454
+ "selected_prompts": selected_prompts,
455
+ "selected_objectives": selected_cat_objectives
456
+ }
457
+
458
+ self.logger.info(f"Using {len(selected_prompts)} custom objectives for {risk_cat_value}")
459
+ return selected_prompts
460
+
461
+ else:
462
+ # Use the RAI service to get attack objectives
463
+ try:
464
+ self.logger.debug(f"API call: get_attack_objectives({risk_cat_value}, app: {application_scenario}, strategy: {strategy})")
465
+ # strategy param specifies whether to get a strategy-specific dataset from the RAI service
466
+ # right now, only tense requires strategy-specific dataset
467
+ if "tense" in strategy:
468
+ objectives_response = await self.generated_rai_client.get_attack_objectives(
469
+ risk_category=risk_cat_value,
470
+ application_scenario=application_scenario or "",
471
+ strategy="tense"
472
+ )
473
+ else:
474
+ objectives_response = await self.generated_rai_client.get_attack_objectives(
475
+ risk_category=risk_cat_value,
476
+ application_scenario=application_scenario or "",
477
+ strategy=None
478
+ )
479
+ if isinstance(objectives_response, list):
480
+ self.logger.debug(f"API returned {len(objectives_response)} objectives")
481
+ else:
482
+ self.logger.debug(f"API returned response of type: {type(objectives_response)}")
483
+
484
+ # Handle jailbreak strategy - need to apply jailbreak prefixes to messages
485
+ if strategy == "jailbreak":
486
+ self.logger.debug("Applying jailbreak prefixes to objectives")
487
+ jailbreak_prefixes = await self.generated_rai_client.get_jailbreak_prefixes()
488
+ for objective in objectives_response:
489
+ if "messages" in objective and len(objective["messages"]) > 0:
490
+ message = objective["messages"][0]
491
+ if isinstance(message, dict) and "content" in message:
492
+ message["content"] = f"{random.choice(jailbreak_prefixes)} {message['content']}"
493
+ except Exception as e:
494
+ log_error(self.logger, "Error calling get_attack_objectives", e)
495
+ self.logger.warning("API call failed, returning empty objectives list")
496
+ return []
497
+
498
+ # Check if the response is valid
499
+ if not objectives_response or (isinstance(objectives_response, dict) and not objectives_response.get("objectives")):
500
+ self.logger.warning("Empty or invalid response, returning empty list")
501
+ return []
502
+
503
+ # For non-baseline strategies, filter by baseline IDs if they exist
504
+ if strategy != "baseline" and baseline_objectives_exist:
505
+ self.logger.debug(f"Found existing baseline objectives for {risk_cat_value}, will filter {strategy} by baseline IDs")
506
+ baseline_selected_objectives = self.attack_objectives[baseline_key].get("selected_objectives", [])
507
+ baseline_objective_ids = []
508
+
509
+ # Extract IDs from baseline objectives
510
+ for obj in baseline_selected_objectives:
511
+ if "id" in obj:
512
+ baseline_objective_ids.append(obj["id"])
513
+
514
+ if baseline_objective_ids:
515
+ self.logger.debug(f"Filtering by {len(baseline_objective_ids)} baseline objective IDs for {strategy}")
516
+
517
+ # Filter objectives by baseline IDs
518
+ selected_cat_objectives = []
519
+ for obj in objectives_response:
520
+ if obj.get("id") in baseline_objective_ids:
521
+ selected_cat_objectives.append(obj)
522
+
523
+ self.logger.debug(f"Found {len(selected_cat_objectives)} matching objectives with baseline IDs")
524
+ # If we couldn't find all the baseline IDs, log a warning
525
+ if len(selected_cat_objectives) < len(baseline_objective_ids):
526
+ self.logger.warning(f"Only found {len(selected_cat_objectives)} objectives matching baseline IDs, expected {len(baseline_objective_ids)}")
527
+ else:
528
+ self.logger.warning("No baseline objective IDs found, using random selection")
529
+ # If we don't have baseline IDs for some reason, default to random selection
530
+ if len(objectives_response) > num_objectives:
531
+ selected_cat_objectives = random.sample(objectives_response, num_objectives)
532
+ else:
533
+ selected_cat_objectives = objectives_response
534
+ else:
535
+ # This is the baseline strategy or we don't have baseline objectives yet
536
+ self.logger.debug(f"Using random selection for {strategy} strategy")
537
+ if len(objectives_response) > num_objectives:
538
+ self.logger.debug(f"Selecting {num_objectives} objectives from {len(objectives_response)} available")
539
+ selected_cat_objectives = random.sample(objectives_response, num_objectives)
540
+ else:
541
+ selected_cat_objectives = objectives_response
542
+
543
+ if len(selected_cat_objectives) < num_objectives:
544
+ self.logger.warning(f"Only found {len(selected_cat_objectives)} objectives for {risk_cat_value}, fewer than requested {num_objectives}")
545
+
546
+ # Extract content from selected objectives
547
+ selected_prompts = []
548
+ for obj in selected_cat_objectives:
549
+ if "messages" in obj and len(obj["messages"]) > 0:
550
+ message = obj["messages"][0]
551
+ if isinstance(message, dict) and "content" in message:
552
+ selected_prompts.append(message["content"])
553
+
554
+ # Process the response - organize by category and extract content/IDs
555
+ objectives_by_category = {risk_cat_value: []}
556
+
557
+ # Process list format and organize by category for caching
558
+ for obj in selected_cat_objectives:
559
+ obj_id = obj.get("id", f"obj-{uuid.uuid4()}")
560
+ target_harms = obj.get("metadata", {}).get("target_harms", [])
561
+ content = ""
562
+ if "messages" in obj and len(obj["messages"]) > 0:
563
+ content = obj["messages"][0].get("content", "")
564
+
565
+ if not content:
566
+ continue
567
+ if target_harms:
568
+ for harm in target_harms:
569
+ obj_data = {
570
+ "id": obj_id,
571
+ "content": content
572
+ }
573
+ objectives_by_category[risk_cat_value].append(obj_data)
574
+ break # Just use the first harm for categorization
575
+
576
+ # Store in cache - now including the full selected objectives with IDs
577
+ self.attack_objectives[current_key] = {
578
+ "objectives_by_category": objectives_by_category,
579
+ "strategy": strategy,
580
+ "risk_category": risk_cat_value,
581
+ "selected_prompts": selected_prompts,
582
+ "selected_objectives": selected_cat_objectives # Store full objects with IDs
583
+ }
584
+ self.logger.info(f"Selected {len(selected_prompts)} objectives for {risk_cat_value}")
585
+
586
+ return selected_prompts
587
+
588
+ # Replace with utility function
589
+ def _message_to_dict(self, message: ChatMessage):
590
+ from ._utils.formatting_utils import message_to_dict
591
+ return message_to_dict(message)
592
+
593
+ # Replace with utility function
594
+ def _get_strategy_name(self, attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
595
+ from ._utils.formatting_utils import get_strategy_name
596
+ return get_strategy_name(attack_strategy)
597
+
598
+ # Replace with utility function
599
+ def _get_flattened_attack_strategies(self, attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]) -> List[Union[AttackStrategy, List[AttackStrategy]]]:
600
+ from ._utils.formatting_utils import get_flattened_attack_strategies
601
+ return get_flattened_attack_strategies(attack_strategies)
602
+
603
+ # Replace with utility function
604
+ def _get_converter_for_strategy(self, attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> Union[PromptConverter, List[PromptConverter]]:
605
+ from ._utils.strategy_utils import get_converter_for_strategy
606
+ return get_converter_for_strategy(attack_strategy)
607
+
608
+ async def _prompt_sending_orchestrator(
609
+ self,
610
+ chat_target: PromptChatTarget,
611
+ all_prompts: List[str],
612
+ converter: Union[PromptConverter, List[PromptConverter]],
613
+ strategy_name: str = "unknown",
614
+ risk_category: str = "unknown",
615
+ timeout: int = 120
616
+ ) -> Orchestrator:
617
+ """Send prompts via the PromptSendingOrchestrator with optimized performance.
618
+
619
+ :param chat_target: The target to send prompts to
620
+ :type chat_target: PromptChatTarget
621
+ :param all_prompts: List of prompts to send
622
+ :type all_prompts: List[str]
623
+ :param converter: Converter or list of converters to use for prompt transformation
624
+ :type converter: Union[PromptConverter, List[PromptConverter]]
625
+ :param strategy_name: Name of the strategy being used (for logging)
626
+ :type strategy_name: str
627
+ :param risk_category: Name of the risk category being evaluated (for logging)
628
+ :type risk_category: str
629
+ :param timeout: The timeout in seconds for API calls
630
+ :type timeout: int
631
+ :return: The orchestrator instance with processed results
632
+ :rtype: Orchestrator
633
+ """
634
+ task_key = f"{strategy_name}_{risk_category}_orchestrator"
635
+ self.task_statuses[task_key] = TASK_STATUS["RUNNING"]
636
+
637
+ log_strategy_start(self.logger, strategy_name, risk_category)
638
+
639
+ # Create converter list from single converter or list of converters
640
+ converter_list = [converter] if converter and isinstance(converter, PromptConverter) else converter if converter else []
641
+
642
+ # Log which converter is being used
643
+ if converter_list:
644
+ if isinstance(converter_list, list) and len(converter_list) > 0:
645
+ converter_names = [c.__class__.__name__ for c in converter_list if c is not None]
646
+ self.logger.debug(f"Using converters: {', '.join(converter_names)}")
647
+ elif converter is not None:
648
+ self.logger.debug(f"Using converter: {converter.__class__.__name__}")
649
+ else:
650
+ self.logger.debug("No converters specified")
651
+
652
+ # Optimized orchestrator initialization
653
+ try:
654
+ orchestrator = PromptSendingOrchestrator(
655
+ objective_target=chat_target,
656
+ prompt_converters=converter_list
657
+ )
658
+
659
+ if not all_prompts:
660
+ self.logger.warning(f"No prompts provided to orchestrator for {strategy_name}/{risk_category}")
661
+ self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
662
+ return orchestrator
663
+
664
+ # Debug log the first few characters of each prompt
665
+ self.logger.debug(f"First prompt (truncated): {all_prompts[0][:50]}...")
666
+
667
+ # Use a batched approach for send_prompts_async to prevent overwhelming
668
+ # the model with too many concurrent requests
669
+ batch_size = min(len(all_prompts), 3) # Process 3 prompts at a time max
670
+
671
+ # Process prompts concurrently within each batch
672
+ if len(all_prompts) > batch_size:
673
+ self.logger.debug(f"Processing {len(all_prompts)} prompts in batches of {batch_size} for {strategy_name}/{risk_category}")
674
+ batches = [all_prompts[i:i + batch_size] for i in range(0, len(all_prompts), batch_size)]
675
+
676
+ for batch_idx, batch in enumerate(batches):
677
+ self.logger.debug(f"Processing batch {batch_idx+1}/{len(batches)} with {len(batch)} prompts for {strategy_name}/{risk_category}")
678
+
679
+ batch_start_time = datetime.now()
680
+ # Send prompts in the batch concurrently with a timeout
681
+ try:
682
+ # Use wait_for to implement a timeout
683
+ await asyncio.wait_for(
684
+ orchestrator.send_prompts_async(prompt_list=batch),
685
+ timeout=timeout # Use provided timeout
686
+ )
687
+ batch_duration = (datetime.now() - batch_start_time).total_seconds()
688
+ self.logger.debug(f"Successfully processed batch {batch_idx+1} for {strategy_name}/{risk_category} in {batch_duration:.2f} seconds")
689
+
690
+ # Print progress to console
691
+ if batch_idx < len(batches) - 1: # Don't print for the last batch
692
+ print(f"Strategy {strategy_name}, Risk {risk_category}: Processed batch {batch_idx+1}/{len(batches)}")
693
+
694
+ except asyncio.TimeoutError:
695
+ self.logger.warning(f"Batch {batch_idx+1} for {strategy_name}/{risk_category} timed out after {timeout} seconds, continuing with partial results")
696
+ self.logger.debug(f"Timeout: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1} after {timeout} seconds.", exc_info=True)
697
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1}")
698
+ # Set task status to TIMEOUT
699
+ batch_task_key = f"{strategy_name}_{risk_category}_batch_{batch_idx+1}"
700
+ self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
701
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
702
+ # Continue with partial results rather than failing completely
703
+ continue
704
+ except Exception as e:
705
+ log_error(self.logger, f"Error processing batch {batch_idx+1}", e, f"{strategy_name}/{risk_category}")
706
+ self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1}: {str(e)}")
707
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
708
+ # Continue with other batches even if one fails
709
+ continue
710
+ else:
711
+ # Small number of prompts, process all at once with a timeout
712
+ self.logger.debug(f"Processing {len(all_prompts)} prompts in a single batch for {strategy_name}/{risk_category}")
713
+ batch_start_time = datetime.now()
714
+ try:
715
+ await asyncio.wait_for(
716
+ orchestrator.send_prompts_async(prompt_list=all_prompts),
717
+ timeout=timeout # Use provided timeout
718
+ )
719
+ batch_duration = (datetime.now() - batch_start_time).total_seconds()
720
+ self.logger.debug(f"Successfully processed single batch for {strategy_name}/{risk_category} in {batch_duration:.2f} seconds")
721
+ except asyncio.TimeoutError:
722
+ self.logger.warning(f"Prompt processing for {strategy_name}/{risk_category} timed out after {timeout} seconds, continuing with partial results")
723
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category}")
724
+ # Set task status to TIMEOUT
725
+ single_batch_task_key = f"{strategy_name}_{risk_category}_single_batch"
726
+ self.task_statuses[single_batch_task_key] = TASK_STATUS["TIMEOUT"]
727
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
728
+ except Exception as e:
729
+ log_error(self.logger, "Error processing prompts", e, f"{strategy_name}/{risk_category}")
730
+ self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}: {str(e)}")
731
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
732
+
733
+ self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
734
+ return orchestrator
735
+
736
+ except Exception as e:
737
+ log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{risk_category}")
738
+ self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category}: {str(e)}")
739
+ self.task_statuses[task_key] = TASK_STATUS["FAILED"]
740
+ raise
741
+
742
+ def _write_pyrit_outputs_to_file(self, orchestrator: Orchestrator) -> str:
743
+ """Write PyRIT outputs to a file with a name based on orchestrator, converter, and risk category.
744
+
745
+ :param orchestrator: The orchestrator that generated the outputs
746
+ :type orchestrator: Orchestrator
747
+ :return: Path to the output file
748
+ :rtype: Union[str, os.PathLike]
749
+ """
750
+ base_path = str(uuid.uuid4())
751
+
752
+ # If scan output directory exists, place the file there
753
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
754
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
755
+ else:
756
+ output_path = f"{base_path}{DATA_EXT}"
757
+
758
+ self.logger.debug(f"Writing PyRIT outputs to file: {output_path}")
759
+
760
+ memory = orchestrator.get_memory()
761
+
762
+ # Get conversations as a List[List[ChatMessage]]
763
+ conversations = [[item.to_chat_message() for item in group] for conv_id, group in itertools.groupby(memory, key=lambda x: x.conversation_id)]
764
+
765
+ #Convert to json lines
766
+ json_lines = ""
767
+ for conversation in conversations: # each conversation is a List[ChatMessage]
768
+ json_lines += json.dumps({"conversation": {"messages": [self._message_to_dict(message) for message in conversation]}}) + "\n"
769
+
770
+ with Path(output_path).open("w") as f:
771
+ f.writelines(json_lines)
772
+
773
+ orchestrator.dispose_db_engine()
774
+ self.logger.debug(f"Successfully wrote {len(conversations)} conversations to {output_path}")
775
+ return str(output_path)
776
+
777
+ # Replace with utility function
778
+ def _get_chat_target(self, target: Union[PromptChatTarget,Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration]) -> PromptChatTarget:
779
+ from ._utils.strategy_utils import get_chat_target
780
+ return get_chat_target(target)
781
+
782
+ # Replace with utility function
783
+ def _get_orchestrators_for_attack_strategies(self, attack_strategy: List[Union[AttackStrategy, List[AttackStrategy]]]) -> List[Callable]:
784
+ # We need to modify this to use our actual _prompt_sending_orchestrator since the utility function can't access it
785
+ call_to_orchestrators = []
786
+ # Sending PromptSendingOrchestrator for each complexity level
787
+ if AttackStrategy.EASY in attack_strategy:
788
+ call_to_orchestrators.extend([self._prompt_sending_orchestrator])
789
+ elif AttackStrategy.MODERATE in attack_strategy:
790
+ call_to_orchestrators.extend([self._prompt_sending_orchestrator])
791
+ elif AttackStrategy.DIFFICULT in attack_strategy:
792
+ call_to_orchestrators.extend([self._prompt_sending_orchestrator])
793
+ else:
794
+ call_to_orchestrators.extend([self._prompt_sending_orchestrator])
795
+ return call_to_orchestrators
796
+
797
+ # Replace with utility function
798
+ def _get_attack_success(self, result: str) -> bool:
799
+ from ._utils.formatting_utils import get_attack_success
800
+ return get_attack_success(result)
801
+
802
+ def _to_red_team_result(self) -> RedTeamResult:
803
+ """Convert tracking data from red_team_info to the RedTeamResult format.
804
+
805
+ Uses only the red_team_info tracking dictionary to build the RedTeamResult.
806
+
807
+ :return: Structured red team agent results
808
+ :rtype: RedTeamResult
809
+ """
810
+ converters = []
811
+ complexity_levels = []
812
+ risk_categories = []
813
+ attack_successes = [] # unified list for all attack successes
814
+ conversations = []
815
+
816
+ # Create a CSV summary file for attack data in the scan output directory if available
817
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
818
+ summary_file = os.path.join(self.scan_output_dir, "attack_summary.csv")
819
+ self.logger.debug(f"Creating attack summary CSV file: {summary_file}")
820
+
821
+ self.logger.info(f"Building RedTeamResult from red_team_info with {len(self.red_team_info)} strategies")
822
+
823
+ # Process each strategy and risk category from red_team_info
824
+ for strategy_name, risk_data in self.red_team_info.items():
825
+ self.logger.info(f"Processing results for strategy: {strategy_name}")
826
+
827
+ # Determine complexity level for this strategy
828
+ if "Baseline" in strategy_name:
829
+ complexity_level = "baseline"
830
+ else:
831
+ # Try to map strategy name to complexity level
832
+ # Default is difficult since we assume it's a composed strategy
833
+ complexity_level = ATTACK_STRATEGY_COMPLEXITY_MAP.get(strategy_name, "difficult")
834
+
835
+ for risk_category, data in risk_data.items():
836
+ self.logger.info(f"Processing data for {risk_category} in strategy {strategy_name}")
837
+
838
+ data_file = data.get("data_file", "")
839
+ eval_result = data.get("evaluation_result")
840
+
841
+ # Process data file to extract conversations
842
+ if data_file and os.path.exists(data_file):
843
+ try:
844
+ with open(data_file, "r") as f:
845
+ for line in f:
846
+ try:
847
+ conv_data = json.loads(line)
848
+ if "conversation" in conv_data and "messages" in conv_data["conversation"]:
849
+ messages = conv_data["conversation"]["messages"]
850
+
851
+ # Determine attack success based on evaluation results if available
852
+ attack_success = None
853
+ risk_assessment = {}
854
+
855
+ # If we have evaluation results, try to extract attack success and risk assessment
856
+ if eval_result:
857
+ # Look for this conversation in the evaluation results
858
+ rows = eval_result.get("rows", [])
859
+ for r in rows:
860
+ if r.get("inputs.conversation", {}).get("messages") == messages:
861
+ # Found matching conversation
862
+ if f"outputs.{risk_category}.{risk_category}_result" in r:
863
+ attack_success = self._get_attack_success(r[f"outputs.{risk_category}.{risk_category}_result"])
864
+
865
+ # Extract risk assessments for all categories
866
+ for risk in self.risk_categories:
867
+ risk_value = risk.value
868
+ if f"outputs.{risk_value}.{risk_value}" in r and f"outputs.{risk_value}.{risk_value}_reason" in r:
869
+ risk_assessment[risk_value] = {
870
+ "severity_label": r[f"outputs.{risk_value}.{risk_value}"],
871
+ "reason": r[f"outputs.{risk_value}.{risk_value}_reason"]
872
+ }
873
+
874
+ # Add to tracking arrays for statistical analysis
875
+ converters.append(strategy_name)
876
+ complexity_levels.append(complexity_level)
877
+ risk_categories.append(risk_category)
878
+
879
+ if attack_success is not None:
880
+ attack_successes.append(1 if attack_success else 0)
881
+ else:
882
+ attack_successes.append(None)
883
+
884
+ # Add conversation object
885
+ conversation = {
886
+ "attack_success": attack_success,
887
+ "attack_technique": strategy_name.replace("Converter", "").replace("Prompt", ""),
888
+ "attack_complexity": complexity_level,
889
+ "risk_category": risk_category,
890
+ "conversation": messages,
891
+ "risk_assessment": risk_assessment if risk_assessment else None
892
+ }
893
+ conversations.append(conversation)
894
+ except json.JSONDecodeError as e:
895
+ self.logger.error(f"Error parsing JSON in data file {data_file}: {e}")
896
+ except Exception as e:
897
+ self.logger.error(f"Error processing data file {data_file}: {e}")
898
+ else:
899
+ self.logger.warning(f"Data file {data_file} not found or not specified for {strategy_name}/{risk_category}")
900
+
901
+ # Sort conversations by attack technique for better readability
902
+ conversations.sort(key=lambda x: x["attack_technique"])
903
+
904
+ self.logger.info(f"Processed {len(conversations)} conversations from all data files")
905
+
906
+ # Create a DataFrame for analysis - with unified structure
907
+ results_dict = {
908
+ "converter": converters,
909
+ "complexity_level": complexity_levels,
910
+ "risk_category": risk_categories,
911
+ }
912
+
913
+ # Only include attack_success if we have evaluation results
914
+ if any(success is not None for success in attack_successes):
915
+ results_dict["attack_success"] = [math.nan if success is None else success for success in attack_successes]
916
+ self.logger.info(f"Including attack success data for {sum(1 for s in attack_successes if s is not None)} conversations")
917
+
918
+ results_df = pd.DataFrame.from_dict(results_dict)
919
+
920
+ if "attack_success" not in results_df.columns or results_df.empty:
921
+ # If we don't have evaluation results or the DataFrame is empty, create a default scorecard
922
+ self.logger.info("No evaluation results available or no data found, creating default scorecard")
923
+
924
+ # Create a basic scorecard structure
925
+ scorecard = {
926
+ "risk_category_summary": [{"overall_asr": 0.0, "overall_total": len(conversations), "overall_attack_successes": 0}],
927
+ "attack_technique_summary": [{"overall_asr": 0.0, "overall_total": len(conversations), "overall_attack_successes": 0}],
928
+ "joint_risk_attack_summary": [],
929
+ "detailed_joint_risk_attack_asr": {}
930
+ }
931
+
932
+ # Create basic parameters
933
+ redteaming_parameters = {
934
+ "attack_objective_generated_from": {
935
+ "application_scenario": self.application_scenario,
936
+ "risk_categories": [risk.value for risk in self.risk_categories],
937
+ "custom_attack_seed_prompts": "",
938
+ "policy_document": ""
939
+ },
940
+ "attack_complexity": list(set(complexity_levels)) if complexity_levels else ["baseline", "easy"],
941
+ "techniques_used": {}
942
+ }
943
+
944
+ for complexity in set(complexity_levels) if complexity_levels else ["baseline", "easy"]:
945
+ complexity_converters = [conv for i, conv in enumerate(converters) if i < len(complexity_levels) and complexity_levels[i] == complexity]
946
+ redteaming_parameters["techniques_used"][complexity] = list(set(complexity_converters)) if complexity_converters else []
947
+ else:
948
+ # Calculate risk category summaries by aggregating on risk category
949
+ risk_category_groups = results_df.groupby("risk_category")
950
+ risk_category_summary = {}
951
+
952
+ # Overall metrics across all categories
953
+ try:
954
+ overall_asr = round(list_mean_nan_safe(results_df["attack_success"].tolist()) * 100, 2) if "attack_success" in results_df.columns else 0.0
955
+ except EvaluationException:
956
+ self.logger.debug("All values in overall attack success array were None or NaN, setting ASR to NaN")
957
+ overall_asr = math.nan
958
+ overall_total = len(results_df)
959
+ overall_successful_attacks = sum([s for s in results_df["attack_success"].tolist() if not is_none_or_nan(s)]) if "attack_success" in results_df.columns else 0
960
+
961
+ risk_category_summary.update({
962
+ "overall_asr": overall_asr,
963
+ "overall_total": overall_total,
964
+ "overall_attack_successes": int(overall_successful_attacks)
965
+ })
966
+
967
+ # Per-risk category metrics
968
+ for risk, group in risk_category_groups:
969
+ try:
970
+ asr = round(list_mean_nan_safe(group["attack_success"].tolist()) * 100, 2) if "attack_success" in group.columns else 0.0
971
+ except EvaluationException:
972
+ self.logger.debug(f"All values in attack success array for {risk} were None or NaN, setting ASR to NaN")
973
+ asr = math.nan
974
+ total = len(group)
975
+ successful_attacks =sum([s for s in group["attack_success"].tolist() if not is_none_or_nan(s)]) if "attack_success" in group.columns else 0
976
+
977
+ risk_category_summary.update({
978
+ f"{risk}_asr": asr,
979
+ f"{risk}_total": total,
980
+ f"{risk}_successful_attacks": int(successful_attacks)
981
+ })
982
+
983
+ # Calculate attack technique summaries by complexity level
984
+ # First, create masks for each complexity level
985
+ baseline_mask = results_df["complexity_level"] == "baseline"
986
+ easy_mask = results_df["complexity_level"] == "easy"
987
+ moderate_mask = results_df["complexity_level"] == "moderate"
988
+ difficult_mask = results_df["complexity_level"] == "difficult"
989
+
990
+ # Then calculate metrics for each complexity level
991
+ attack_technique_summary_dict = {}
992
+
993
+ # Baseline metrics
994
+ baseline_df = results_df[baseline_mask]
995
+ if not baseline_df.empty:
996
+ try:
997
+ baseline_asr = round(list_mean_nan_safe(baseline_df["attack_success"].tolist()) * 100, 2) if "attack_success" in baseline_df.columns else 0.0
998
+ except EvaluationException:
999
+ self.logger.debug("All values in baseline attack success array were None or NaN, setting ASR to NaN")
1000
+ baseline_asr = math.nan
1001
+ attack_technique_summary_dict.update({
1002
+ "baseline_asr": baseline_asr,
1003
+ "baseline_total": len(baseline_df),
1004
+ "baseline_attack_successes": sum([s for s in baseline_df["attack_success"].tolist() if not is_none_or_nan(s)]) if "attack_success" in baseline_df.columns else 0
1005
+ })
1006
+
1007
+ # Easy complexity metrics
1008
+ easy_df = results_df[easy_mask]
1009
+ if not easy_df.empty:
1010
+ try:
1011
+ easy_complexity_asr = round(list_mean_nan_safe(easy_df["attack_success"].tolist()) * 100, 2) if "attack_success" in easy_df.columns else 0.0
1012
+ except EvaluationException:
1013
+ self.logger.debug("All values in easy complexity attack success array were None or NaN, setting ASR to NaN")
1014
+ easy_complexity_asr = math.nan
1015
+ attack_technique_summary_dict.update({
1016
+ "easy_complexity_asr": easy_complexity_asr,
1017
+ "easy_complexity_total": len(easy_df),
1018
+ "easy_complexity_attack_successes": sum([s for s in easy_df["attack_success"].tolist() if not is_none_or_nan(s)]) if "attack_success" in easy_df.columns else 0
1019
+ })
1020
+
1021
+ # Moderate complexity metrics
1022
+ moderate_df = results_df[moderate_mask]
1023
+ if not moderate_df.empty:
1024
+ try:
1025
+ moderate_complexity_asr = round(list_mean_nan_safe(moderate_df["attack_success"].tolist()) * 100, 2) if "attack_success" in moderate_df.columns else 0.0
1026
+ except EvaluationException:
1027
+ self.logger.debug("All values in moderate complexity attack success array were None or NaN, setting ASR to NaN")
1028
+ moderate_complexity_asr = math.nan
1029
+ attack_technique_summary_dict.update({
1030
+ "moderate_complexity_asr": moderate_complexity_asr,
1031
+ "moderate_complexity_total": len(moderate_df),
1032
+ "moderate_complexity_attack_successes": sum([s for s in moderate_df["attack_success"].tolist() if not is_none_or_nan(s)]) if "attack_success" in moderate_df.columns else 0
1033
+ })
1034
+
1035
+ # Difficult complexity metrics
1036
+ difficult_df = results_df[difficult_mask]
1037
+ if not difficult_df.empty:
1038
+ try:
1039
+ difficult_complexity_asr = round(list_mean_nan_safe(difficult_df["attack_success"].tolist()) * 100, 2) if "attack_success" in difficult_df.columns else 0.0
1040
+ except EvaluationException:
1041
+ self.logger.debug("All values in difficult complexity attack success array were None or NaN, setting ASR to NaN")
1042
+ difficult_complexity_asr = math.nan
1043
+ attack_technique_summary_dict.update({
1044
+ "difficult_complexity_asr": difficult_complexity_asr,
1045
+ "difficult_complexity_total": len(difficult_df),
1046
+ "difficult_complexity_attack_successes": sum([s for s in difficult_df["attack_success"].tolist() if not is_none_or_nan(s)]) if "attack_success" in difficult_df.columns else 0
1047
+ })
1048
+
1049
+ # Overall metrics
1050
+ attack_technique_summary_dict.update({
1051
+ "overall_asr": overall_asr,
1052
+ "overall_total": overall_total,
1053
+ "overall_attack_successes": int(overall_successful_attacks)
1054
+ })
1055
+
1056
+ attack_technique_summary = [attack_technique_summary_dict]
1057
+
1058
+ # Create joint risk attack summary
1059
+ joint_risk_attack_summary = []
1060
+ unique_risks = results_df["risk_category"].unique()
1061
+
1062
+ for risk in unique_risks:
1063
+ risk_key = risk.replace("-", "_")
1064
+ risk_mask = results_df["risk_category"] == risk
1065
+
1066
+ joint_risk_dict = {"risk_category": risk_key}
1067
+
1068
+ # Baseline ASR for this risk
1069
+ baseline_risk_df = results_df[risk_mask & baseline_mask]
1070
+ if not baseline_risk_df.empty:
1071
+ try:
1072
+ joint_risk_dict["baseline_asr"] = round(list_mean_nan_safe(baseline_risk_df["attack_success"].tolist()) * 100, 2) if "attack_success" in baseline_risk_df.columns else 0.0
1073
+ except EvaluationException:
1074
+ self.logger.debug(f"All values in baseline attack success array for {risk_key} were None or NaN, setting ASR to NaN")
1075
+ joint_risk_dict["baseline_asr"] = math.nan
1076
+
1077
+ # Easy complexity ASR for this risk
1078
+ easy_risk_df = results_df[risk_mask & easy_mask]
1079
+ if not easy_risk_df.empty:
1080
+ try:
1081
+ joint_risk_dict["easy_complexity_asr"] = round(list_mean_nan_safe(easy_risk_df["attack_success"].tolist()) * 100, 2) if "attack_success" in easy_risk_df.columns else 0.0
1082
+ except EvaluationException:
1083
+ self.logger.debug(f"All values in easy complexity attack success array for {risk_key} were None or NaN, setting ASR to NaN")
1084
+ joint_risk_dict["easy_complexity_asr"] = math.nan
1085
+
1086
+ # Moderate complexity ASR for this risk
1087
+ moderate_risk_df = results_df[risk_mask & moderate_mask]
1088
+ if not moderate_risk_df.empty:
1089
+ try:
1090
+ joint_risk_dict["moderate_complexity_asr"] = round(list_mean_nan_safe(moderate_risk_df["attack_success"].tolist()) * 100, 2) if "attack_success" in moderate_risk_df.columns else 0.0
1091
+ except EvaluationException:
1092
+ self.logger.debug(f"All values in moderate complexity attack success array for {risk_key} were None or NaN, setting ASR to NaN")
1093
+ joint_risk_dict["moderate_complexity_asr"] = math.nan
1094
+
1095
+ # Difficult complexity ASR for this risk
1096
+ difficult_risk_df = results_df[risk_mask & difficult_mask]
1097
+ if not difficult_risk_df.empty:
1098
+ try:
1099
+ joint_risk_dict["difficult_complexity_asr"] = round(list_mean_nan_safe(difficult_risk_df["attack_success"].tolist()) * 100, 2) if "attack_success" in difficult_risk_df.columns else 0.0
1100
+ except EvaluationException:
1101
+ self.logger.debug(f"All values in difficult complexity attack success array for {risk_key} were None or NaN, setting ASR to NaN")
1102
+ joint_risk_dict["difficult_complexity_asr"] = math.nan
1103
+
1104
+ joint_risk_attack_summary.append(joint_risk_dict)
1105
+
1106
+ # Calculate detailed joint risk attack ASR
1107
+ detailed_joint_risk_attack_asr = {}
1108
+ unique_complexities = sorted([c for c in results_df["complexity_level"].unique() if c != "baseline"])
1109
+
1110
+ for complexity in unique_complexities:
1111
+ complexity_mask = results_df["complexity_level"] == complexity
1112
+ if results_df[complexity_mask].empty:
1113
+ continue
1114
+
1115
+ detailed_joint_risk_attack_asr[complexity] = {}
1116
+
1117
+ for risk in unique_risks:
1118
+ risk_key = risk.replace("-", "_")
1119
+ risk_mask = results_df["risk_category"] == risk
1120
+ detailed_joint_risk_attack_asr[complexity][risk_key] = {}
1121
+
1122
+ # Group by converter within this complexity and risk
1123
+ complexity_risk_df = results_df[complexity_mask & risk_mask]
1124
+ if complexity_risk_df.empty:
1125
+ continue
1126
+
1127
+ converter_groups = complexity_risk_df.groupby("converter")
1128
+ for converter_name, converter_group in converter_groups:
1129
+ try:
1130
+ asr_value = round(list_mean_nan_safe(converter_group["attack_success"].tolist()) * 100, 2) if "attack_success" in converter_group.columns else 0.0
1131
+ except EvaluationException:
1132
+ self.logger.debug(f"All values in attack success array for {converter_name} in {complexity}/{risk_key} were None or NaN, setting ASR to NaN")
1133
+ asr_value = math.nan
1134
+ detailed_joint_risk_attack_asr[complexity][risk_key][f"{converter_name}_ASR"] = asr_value
1135
+
1136
+ # Compile the scorecard
1137
+ scorecard = {
1138
+ "risk_category_summary": [risk_category_summary],
1139
+ "attack_technique_summary": attack_technique_summary,
1140
+ "joint_risk_attack_summary": joint_risk_attack_summary,
1141
+ "detailed_joint_risk_attack_asr": detailed_joint_risk_attack_asr
1142
+ }
1143
+
1144
+ # Create redteaming parameters
1145
+ redteaming_parameters = {
1146
+ "attack_objective_generated_from": {
1147
+ "application_scenario": self.application_scenario,
1148
+ "risk_categories": [risk.value for risk in self.risk_categories],
1149
+ "custom_attack_seed_prompts": "",
1150
+ "policy_document": ""
1151
+ },
1152
+ "attack_complexity": [c.capitalize() for c in unique_complexities],
1153
+ "techniques_used": {}
1154
+ }
1155
+
1156
+ # Populate techniques used by complexity level
1157
+ for complexity in unique_complexities:
1158
+ complexity_mask = results_df["complexity_level"] == complexity
1159
+ complexity_df = results_df[complexity_mask]
1160
+ if not complexity_df.empty:
1161
+ complexity_converters = complexity_df["converter"].unique().tolist()
1162
+ redteaming_parameters["techniques_used"][complexity] = complexity_converters
1163
+
1164
+ self.logger.info("RedTeamResult creation completed")
1165
+
1166
+ # Create the final result
1167
+ red_team_result = ScanResult(
1168
+ scorecard=cast(RedTeamingScorecard, scorecard),
1169
+ parameters=cast(RedTeamingParameters, redteaming_parameters),
1170
+ attack_details=conversations,
1171
+ studio_url=self.ai_studio_url or None
1172
+ )
1173
+
1174
+ return red_team_result
1175
+
1176
+ # Replace with utility function
1177
+ def _to_scorecard(self, redteam_result: RedTeamResult) -> str:
1178
+ from ._utils.formatting_utils import format_scorecard
1179
+ return format_scorecard(redteam_result)
1180
+
1181
+ async def _evaluate(
1182
+ self,
1183
+ data_path: Union[str, os.PathLike],
1184
+ risk_category: RiskCategory,
1185
+ strategy: Union[AttackStrategy, List[AttackStrategy]],
1186
+ scan_name: Optional[str] = None,
1187
+ data_only: bool = False,
1188
+ output_path: Optional[Union[str, os.PathLike]] = None
1189
+ ) -> None:
1190
+ """Call the evaluate method if not data_only.
1191
+
1192
+ :param scan_name: Optional name for the evaluation.
1193
+ :type scan_name: Optional[str]
1194
+ :param data_only: Whether to return only data paths instead of evaluation results.
1195
+ :type data_only: bool
1196
+ :param data_path: Path to the input data.
1197
+ :type data_path: Optional[Union[str, os.PathLike]]
1198
+ :param output_path: Path for output results.
1199
+ :type output_path: Optional[Union[str, os.PathLike]]
1200
+ :return: Evaluation results or data paths.
1201
+ :rtype: Union[Dict[str, EvaluationResult], Dict[str, List[str]]]
1202
+ """
1203
+ strategy_name = self._get_strategy_name(strategy)
1204
+ self.logger.debug(f"Evaluate called with data_path={data_path}, risk_category={risk_category.value}, strategy={strategy_name}, output_path={output_path}, data_only={data_only}, scan_name={scan_name}")
1205
+ if data_only:
1206
+ return None
1207
+
1208
+ # If output_path is provided, use it; otherwise create one in the scan output directory if available
1209
+ if output_path:
1210
+ result_path = output_path
1211
+ elif hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1212
+ result_filename = f"{strategy_name}_{risk_category.value}_{str(uuid.uuid4())}{RESULTS_EXT}"
1213
+ result_path = os.path.join(self.scan_output_dir, result_filename)
1214
+ else:
1215
+ result_path = f"{str(uuid.uuid4())}{RESULTS_EXT}"
1216
+
1217
+ evaluators_dict = {
1218
+ risk_category.value: RISK_CATEGORY_EVALUATOR_MAP[risk_category](azure_ai_project=self.azure_ai_project, credential=self.credential)
1219
+ }
1220
+
1221
+ # Completely suppress all output during evaluation call
1222
+ import io
1223
+ import sys
1224
+ import logging
1225
+ # Don't re-import os as it's already imported at the module level
1226
+
1227
+ # Create a DevNull class to completely discard all writes
1228
+ class DevNull:
1229
+ def write(self, msg):
1230
+ pass
1231
+ def flush(self):
1232
+ pass
1233
+
1234
+ # Store original stdout, stderr and logger settings
1235
+ original_stdout = sys.stdout
1236
+ original_stderr = sys.stderr
1237
+
1238
+ # Get all relevant loggers
1239
+ root_logger = logging.getLogger()
1240
+ promptflow_logger = logging.getLogger('promptflow')
1241
+ azure_logger = logging.getLogger('azure')
1242
+
1243
+ # Store original levels
1244
+ orig_root_level = root_logger.level
1245
+ orig_promptflow_level = promptflow_logger.level
1246
+ orig_azure_level = azure_logger.level
1247
+
1248
+ # Setup a completely silent logger filter
1249
+ class SilentFilter(logging.Filter):
1250
+ def filter(self, record):
1251
+ return False
1252
+
1253
+ # Get original filters to restore later
1254
+ orig_handlers = []
1255
+ for handler in root_logger.handlers:
1256
+ orig_handlers.append((handler, handler.filters.copy(), handler.level))
1257
+
1258
+ try:
1259
+ # Redirect all stdout/stderr output to DevNull to completely suppress it
1260
+ sys.stdout = DevNull()
1261
+ sys.stderr = DevNull()
1262
+
1263
+ # Set all loggers to CRITICAL level to suppress most log messages
1264
+ root_logger.setLevel(logging.CRITICAL)
1265
+ promptflow_logger.setLevel(logging.CRITICAL)
1266
+ azure_logger.setLevel(logging.CRITICAL)
1267
+
1268
+ # Add silent filter to all handlers
1269
+ silent_filter = SilentFilter()
1270
+ for handler in root_logger.handlers:
1271
+ handler.addFilter(silent_filter)
1272
+ handler.setLevel(logging.CRITICAL)
1273
+
1274
+ # Create a file handler for any logs we actually want to keep
1275
+ file_log_path = os.path.join(self.scan_output_dir, "redteam.log")
1276
+ file_handler = logging.FileHandler(file_log_path, mode='a')
1277
+ file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
1278
+
1279
+ # Allow file handler to capture DEBUG logs
1280
+ file_handler.setLevel(logging.DEBUG)
1281
+
1282
+ # Setup our own minimal logger for critical events
1283
+ eval_logger = logging.getLogger('redteam_evaluation')
1284
+ eval_logger.propagate = False # Don't pass to root logger
1285
+ eval_logger.setLevel(logging.DEBUG)
1286
+ eval_logger.addHandler(file_handler)
1287
+
1288
+ # Run evaluation silently
1289
+ eval_logger.debug(f"Starting evaluation for {risk_category.value}/{strategy_name}")
1290
+ evaluate_outputs = evaluate(
1291
+ data=data_path,
1292
+ evaluators=evaluators_dict,
1293
+ output_path=result_path,
1294
+ )
1295
+ eval_logger.debug(f"Completed evaluation for {risk_category.value}/{strategy_name}")
1296
+ finally:
1297
+ # Restore original stdout and stderr
1298
+ sys.stdout = original_stdout
1299
+ sys.stderr = original_stderr
1300
+
1301
+ # Restore original log levels
1302
+ root_logger.setLevel(orig_root_level)
1303
+ promptflow_logger.setLevel(orig_promptflow_level)
1304
+ azure_logger.setLevel(orig_azure_level)
1305
+
1306
+ # Restore original handlers and filters
1307
+ for handler, filters, level in orig_handlers:
1308
+ # Remove any filters we added
1309
+ for filter in list(handler.filters):
1310
+ handler.removeFilter(filter)
1311
+
1312
+ # Restore original filters
1313
+ for filter in filters:
1314
+ handler.addFilter(filter)
1315
+
1316
+ # Restore original level
1317
+ handler.setLevel(level)
1318
+
1319
+ # Clean up our custom logger
1320
+ try:
1321
+ if 'eval_logger' in locals() and 'file_handler' in locals():
1322
+ eval_logger.removeHandler(file_handler)
1323
+ file_handler.close()
1324
+ except Exception as e:
1325
+ self.logger.warning(f"Failed to clean up logger: {str(e)}")
1326
+ self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["evaluation_result_file"] = str(result_path)
1327
+ self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["evaluation_result"] = evaluate_outputs
1328
+ self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["status"] = TASK_STATUS["COMPLETED"]
1329
+ self.logger.debug(f"Evaluation complete for {strategy_name}/{risk_category.value}, results stored in red_team_info")
1330
+
1331
+ async def _process_attack(
1332
+ self,
1333
+ target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
1334
+ call_orchestrator: Callable,
1335
+ strategy: Union[AttackStrategy, List[AttackStrategy]],
1336
+ risk_category: RiskCategory,
1337
+ all_prompts: List[str],
1338
+ progress_bar: tqdm,
1339
+ progress_bar_lock: asyncio.Lock,
1340
+ scan_name: Optional[str] = None,
1341
+ data_only: bool = False,
1342
+ output_path: Optional[Union[str, os.PathLike]] = None,
1343
+ timeout: int = 120,
1344
+ ) -> Optional[EvaluationResult]:
1345
+ """Process a red team scan with the given orchestrator, converter, and prompts.
1346
+
1347
+ :param target: The target model or function to scan
1348
+ :param call_orchestrator: Function to call to create an orchestrator
1349
+ :param strategy: The attack strategy to use
1350
+ :param risk_category: The risk category to evaluate
1351
+ :param all_prompts: List of prompts to use for the scan
1352
+ :param progress_bar: Progress bar to update
1353
+ :param progress_bar_lock: Lock for the progress bar
1354
+ :param scan_name: Optional name for the evaluation
1355
+ :param data_only: Whether to return only data without evaluation
1356
+ :param output_path: Optional path for output
1357
+ :param timeout: The timeout in seconds for API calls
1358
+ """
1359
+ strategy_name = self._get_strategy_name(strategy)
1360
+ task_key = f"{strategy_name}_{risk_category.value}_attack"
1361
+ self.task_statuses[task_key] = TASK_STATUS["RUNNING"]
1362
+
1363
+ try:
1364
+ start_time = time.time()
1365
+ print(f"▶️ Starting task: {strategy_name} strategy for {risk_category.value} risk category")
1366
+ log_strategy_start(self.logger, strategy_name, risk_category.value)
1367
+
1368
+ converter = self._get_converter_for_strategy(strategy)
1369
+ try:
1370
+ self.logger.debug(f"Calling orchestrator for {strategy_name} strategy")
1371
+ orchestrator = await call_orchestrator(self.chat_target, all_prompts, converter, strategy_name, risk_category.value, timeout)
1372
+ except PyritException as e:
1373
+ log_error(self.logger, f"Error calling orchestrator for {strategy_name} strategy", e)
1374
+ self.logger.debug(f"Orchestrator error for {strategy_name}/{risk_category.value}: {str(e)}")
1375
+ self.task_statuses[task_key] = TASK_STATUS["FAILED"]
1376
+ self.failed_tasks += 1
1377
+
1378
+ async with progress_bar_lock:
1379
+ progress_bar.update(1)
1380
+ return None
1381
+
1382
+ data_path = self._write_pyrit_outputs_to_file(orchestrator)
1383
+
1384
+ # Store data file in our tracking dictionary
1385
+ self.red_team_info[strategy_name][risk_category.value]["data_file"] = data_path
1386
+ self.logger.debug(f"Updated red_team_info with data file: {strategy_name} -> {risk_category.value} -> {data_path}")
1387
+
1388
+ try:
1389
+ await self._evaluate(
1390
+ scan_name=scan_name,
1391
+ risk_category=risk_category,
1392
+ strategy=strategy,
1393
+ data_only=data_only,
1394
+ data_path=data_path,
1395
+ output_path=output_path,
1396
+ )
1397
+ except Exception as e:
1398
+ log_error(self.logger, f"Error during evaluation for {strategy_name}/{risk_category.value}", e)
1399
+ print(f"⚠️ Evaluation error for {strategy_name}/{risk_category.value}: {str(e)}")
1400
+ self.red_team_info[strategy_name][risk_category.value]["status"] = TASK_STATUS["FAILED"]
1401
+ # Continue processing even if evaluation fails
1402
+
1403
+ async with progress_bar_lock:
1404
+ self.completed_tasks += 1
1405
+ progress_bar.update(1)
1406
+ completion_pct = (self.completed_tasks / self.total_tasks) * 100
1407
+ elapsed_time = time.time() - start_time
1408
+
1409
+ # Calculate estimated remaining time
1410
+ if self.start_time:
1411
+ total_elapsed = time.time() - self.start_time
1412
+ avg_time_per_task = total_elapsed / self.completed_tasks if self.completed_tasks > 0 else 0
1413
+ remaining_tasks = self.total_tasks - self.completed_tasks
1414
+ est_remaining_time = avg_time_per_task * remaining_tasks if avg_time_per_task > 0 else 0
1415
+
1416
+ # Print task completion message and estimated time on separate lines
1417
+ # This ensures they don't get concatenated with tqdm output
1418
+ print("") # Empty line to separate from progress bar
1419
+ print(f"✅ Completed task {self.completed_tasks}/{self.total_tasks} ({completion_pct:.1f}%) - {strategy_name}/{risk_category.value} in {elapsed_time:.1f}s")
1420
+ print(f" Est. remaining: {est_remaining_time/60:.1f} minutes")
1421
+ else:
1422
+ print("") # Empty line to separate from progress bar
1423
+ print(f"✅ Completed task {self.completed_tasks}/{self.total_tasks} ({completion_pct:.1f}%) - {strategy_name}/{risk_category.value} in {elapsed_time:.1f}s")
1424
+
1425
+ log_strategy_completion(self.logger, strategy_name, risk_category.value, elapsed_time)
1426
+ self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
1427
+
1428
+ except Exception as e:
1429
+ log_error(self.logger, f"Unexpected error processing {strategy_name} strategy for {risk_category.value}", e)
1430
+ self.logger.debug(f"Critical error in task {strategy_name}/{risk_category.value}: {str(e)}")
1431
+ self.task_statuses[task_key] = TASK_STATUS["FAILED"]
1432
+ self.failed_tasks += 1
1433
+
1434
+ async with progress_bar_lock:
1435
+ progress_bar.update(1)
1436
+
1437
+ return None
1438
+
1439
+ async def scan(
1440
+ self,
1441
+ target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget],
1442
+ *,
1443
+ scan_name: Optional[str] = None,
1444
+ num_turns : int = 1,
1445
+ attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]] = [],
1446
+ data_only: bool = False,
1447
+ output_path: Optional[Union[str, os.PathLike]] = None,
1448
+ application_scenario: Optional[str] = None,
1449
+ parallel_execution: bool = True,
1450
+ max_parallel_tasks: int = 5,
1451
+ timeout: int = 120
1452
+ ) -> RedTeamResult:
1453
+ """Run a red team scan against the target using the specified strategies.
1454
+
1455
+ :param target: The target model or function to scan
1456
+ :type target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget]
1457
+ :param scan_name: Optional name for the evaluation
1458
+ :type scan_name: Optional[str]
1459
+ :param num_turns: Number of conversation turns to use in the scan
1460
+ :type num_turns: int
1461
+ :param attack_strategies: List of attack strategies to use
1462
+ :type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
1463
+ :param data_only: Whether to return only data without evaluation
1464
+ :type data_only: bool
1465
+ :param output_path: Optional path for output
1466
+ :type output_path: Optional[Union[str, os.PathLike]]
1467
+ :param application_scenario: Optional description of the application scenario
1468
+ :type application_scenario: Optional[str]
1469
+ :param parallel_execution: Whether to execute orchestrator tasks in parallel
1470
+ :type parallel_execution: bool
1471
+ :param max_parallel_tasks: Maximum number of parallel orchestrator tasks to run (default: 5)
1472
+ :type max_parallel_tasks: int
1473
+ :param timeout: The timeout in seconds for API calls (default: 120)
1474
+ :type timeout: int
1475
+ :return: The output from the red team scan
1476
+ :rtype: RedTeamOutput
1477
+ """
1478
+ # Start timing for performance tracking
1479
+ self.start_time = time.time()
1480
+
1481
+ # Reset task counters and statuses
1482
+ self.task_statuses = {}
1483
+ self.completed_tasks = 0
1484
+ self.failed_tasks = 0
1485
+
1486
+ # Generate a unique scan ID for this run
1487
+ self.scan_id = f"scan_{scan_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" if scan_name else f"scan_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
1488
+ self.scan_id = self.scan_id.replace(" ", "_")
1489
+
1490
+ # Create output directory for this scan
1491
+ # If DEBUG environment variable is set, use a regular folder name; otherwise, use a hidden folder
1492
+ is_debug = os.environ.get("DEBUG", "").lower() in ("true", "1", "yes", "y")
1493
+ folder_prefix = "" if is_debug else "."
1494
+ self.scan_output_dir = os.path.join(self.output_dir or ".", f"{folder_prefix}{self.scan_id}")
1495
+ os.makedirs(self.scan_output_dir, exist_ok=True)
1496
+
1497
+ # Re-initialize logger with the scan output directory
1498
+ self.logger = setup_logger(output_dir=self.scan_output_dir)
1499
+
1500
+ # Set up logging filter to suppress various logs we don't want in the console
1501
+ class LogFilter(logging.Filter):
1502
+ def filter(self, record):
1503
+ # Filter out promptflow logs and evaluation warnings about artifacts
1504
+ if record.name.startswith('promptflow'):
1505
+ return False
1506
+ if 'The path to the artifact is either not a directory or does not exist' in record.getMessage():
1507
+ return False
1508
+ if 'RedTeamOutput object at' in record.getMessage():
1509
+ return False
1510
+ if 'timeout won\'t take effect' in record.getMessage():
1511
+ return False
1512
+ if 'Submitting run' in record.getMessage():
1513
+ return False
1514
+ return True
1515
+
1516
+ # Apply filter to root logger to suppress unwanted logs
1517
+ root_logger = logging.getLogger()
1518
+ log_filter = LogFilter()
1519
+
1520
+ # Remove existing filters first to avoid duplication
1521
+ for handler in root_logger.handlers:
1522
+ for filter in handler.filters:
1523
+ handler.removeFilter(filter)
1524
+ handler.addFilter(log_filter)
1525
+
1526
+ # Also set up stderr logger to use the same filter
1527
+ stderr_logger = logging.getLogger('stderr')
1528
+ for handler in stderr_logger.handlers:
1529
+ handler.addFilter(log_filter)
1530
+
1531
+ log_section_header(self.logger, "Starting red team scan")
1532
+ self.logger.info(f"Scan started with scan_name: {scan_name}")
1533
+ self.logger.info(f"Scan ID: {self.scan_id}")
1534
+ self.logger.info(f"Scan output directory: {self.scan_output_dir}")
1535
+ self.logger.debug(f"Attack strategies: {attack_strategies}")
1536
+ self.logger.debug(f"data_only: {data_only}, output_path: {output_path}")
1537
+ self.logger.debug(f"Timeout: {timeout} seconds")
1538
+
1539
+ # Clear, minimal output for start of scan
1540
+ print(f"🚀 STARTING RED TEAM SCAN: {scan_name}")
1541
+ print(f"📂 Output directory: {self.scan_output_dir}")
1542
+ self.logger.info(f"Starting RED TEAM SCAN: {scan_name}")
1543
+ self.logger.info(f"Output directory: {self.scan_output_dir}")
1544
+
1545
+ chat_target = self._get_chat_target(target)
1546
+ self.chat_target = chat_target
1547
+ self.application_scenario = application_scenario or ""
1548
+
1549
+ if not self.attack_objective_generator:
1550
+ error_msg = "Attack objective generator is required for red team agent."
1551
+ log_error(self.logger, error_msg)
1552
+ self.logger.debug(f"{error_msg}")
1553
+ raise EvaluationException(
1554
+ message=error_msg,
1555
+ internal_message="Attack objective generator is not provided.",
1556
+ target=ErrorTarget.RED_TEAM,
1557
+ category=ErrorCategory.MISSING_FIELD,
1558
+ blame=ErrorBlame.USER_ERROR
1559
+ )
1560
+
1561
+ # If risk categories aren't specified, use all available categories
1562
+ if not self.attack_objective_generator.risk_categories:
1563
+ self.logger.info("No risk categories specified, using all available categories")
1564
+ self.attack_objective_generator.risk_categories = list(RiskCategory)
1565
+
1566
+ self.risk_categories = self.attack_objective_generator.risk_categories
1567
+ # Show risk categories to user
1568
+ print(f"📊 Risk categories: {[rc.value for rc in self.risk_categories]}")
1569
+ self.logger.info(f"Risk categories to process: {[rc.value for rc in self.risk_categories]}")
1570
+
1571
+ # Prepend AttackStrategy.Baseline to the attack strategy list
1572
+ if AttackStrategy.Baseline not in attack_strategies:
1573
+ attack_strategies.insert(0, AttackStrategy.Baseline)
1574
+ self.logger.debug("Added Baseline to attack strategies")
1575
+
1576
+ # When using custom attack objectives, check for incompatible strategies
1577
+ using_custom_objectives = self.attack_objective_generator and self.attack_objective_generator.custom_attack_seed_prompts
1578
+ if using_custom_objectives:
1579
+ # Maintain a list of converters to avoid duplicates
1580
+ used_converter_types = set()
1581
+ strategies_to_remove = []
1582
+
1583
+ for i, strategy in enumerate(attack_strategies):
1584
+ if isinstance(strategy, list):
1585
+ # Skip composite strategies for now
1586
+ continue
1587
+
1588
+ if strategy == AttackStrategy.Jailbreak:
1589
+ self.logger.warning("Jailbreak strategy with custom attack objectives may not work as expected. The strategy will be run, but results may vary.")
1590
+ print("⚠️ Warning: Jailbreak strategy with custom attack objectives may not work as expected.")
1591
+
1592
+ if strategy == AttackStrategy.Tense:
1593
+ self.logger.warning("Tense strategy requires specific formatting in objectives and may not work correctly with custom attack objectives.")
1594
+ print("⚠️ Warning: Tense strategy requires specific formatting in objectives and may not work correctly with custom attack objectives.")
1595
+
1596
+ # Check for redundant converters
1597
+ # TODO: should this be in flattening logic?
1598
+ converter = self._get_converter_for_strategy(strategy)
1599
+ if converter is not None:
1600
+ converter_type = type(converter).__name__ if not isinstance(converter, list) else ','.join([type(c).__name__ for c in converter])
1601
+
1602
+ if converter_type in used_converter_types and strategy != AttackStrategy.Baseline:
1603
+ self.logger.warning(f"Strategy {strategy.name} uses a converter type that has already been used. Skipping redundant strategy.")
1604
+ print(f"ℹ️ Skipping redundant strategy: {strategy.name} (uses same converter as another strategy)")
1605
+ strategies_to_remove.append(strategy)
1606
+ else:
1607
+ used_converter_types.add(converter_type)
1608
+
1609
+ # Remove redundant strategies
1610
+ if strategies_to_remove:
1611
+ attack_strategies = [s for s in attack_strategies if s not in strategies_to_remove]
1612
+ self.logger.info(f"Removed {len(strategies_to_remove)} redundant strategies: {[s.name for s in strategies_to_remove]}")
1613
+
1614
+ with self._start_redteam_mlflow_run(self.azure_ai_project, scan_name) as eval_run:
1615
+ self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination, evaluation_id=eval_run.info.run_id)
1616
+
1617
+ # Show URL for tracking progress
1618
+ print(f"🔗 Track your red team scan in AI Foundry: {self.ai_studio_url}")
1619
+ self.logger.info(f"Started MLFlow run: {self.ai_studio_url}")
1620
+
1621
+ log_subsection_header(self.logger, "Setting up scan configuration")
1622
+ flattened_attack_strategies = self._get_flattened_attack_strategies(attack_strategies)
1623
+ self.logger.info(f"Using {len(flattened_attack_strategies)} attack strategies")
1624
+ self.logger.info(f"Found {len(flattened_attack_strategies)} attack strategies")
1625
+
1626
+ orchestrators = self._get_orchestrators_for_attack_strategies(attack_strategies)
1627
+ self.logger.debug(f"Selected {len(orchestrators)} orchestrators for attack strategies")
1628
+
1629
+ # Calculate total tasks: #risk_categories * #converters * #orchestrators
1630
+ self.total_tasks = len(self.risk_categories) * len(flattened_attack_strategies) * len(orchestrators)
1631
+ # Show task count for user awareness
1632
+ print(f"📋 Planning {self.total_tasks} total tasks")
1633
+ self.logger.info(f"Total tasks: {self.total_tasks} ({len(self.risk_categories)} risk categories * {len(flattened_attack_strategies)} strategies * {len(orchestrators)} orchestrators)")
1634
+
1635
+ # Initialize our tracking dictionary early with empty structures
1636
+ # This ensures we have a place to store results even if tasks fail
1637
+ self.red_team_info = {}
1638
+ for strategy in flattened_attack_strategies:
1639
+ strategy_name = self._get_strategy_name(strategy)
1640
+ self.red_team_info[strategy_name] = {}
1641
+ for risk_category in self.risk_categories:
1642
+ self.red_team_info[strategy_name][risk_category.value] = {
1643
+ "data_file": "",
1644
+ "evaluation_result_file": "",
1645
+ "evaluation_result": None,
1646
+ "status": TASK_STATUS["PENDING"]
1647
+ }
1648
+
1649
+ self.logger.debug(f"Initialized tracking dictionary with {len(self.red_team_info)} strategies")
1650
+
1651
+ # More visible progress bar with additional status
1652
+ progress_bar = tqdm(
1653
+ total=self.total_tasks,
1654
+ desc="Scanning: ",
1655
+ ncols=100,
1656
+ unit="scan",
1657
+ bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]"
1658
+ )
1659
+ progress_bar.set_postfix({"current": "initializing"})
1660
+ progress_bar_lock = asyncio.Lock()
1661
+
1662
+ # Process all API calls sequentially to respect dependencies between objectives
1663
+ log_section_header(self.logger, "Fetching attack objectives")
1664
+
1665
+ # Log the objective source mode
1666
+ if using_custom_objectives:
1667
+ self.logger.info(f"Using custom attack objectives from {self.attack_objective_generator.custom_attack_seed_prompts}")
1668
+ print(f"📚 Using custom attack objectives from {self.attack_objective_generator.custom_attack_seed_prompts}")
1669
+ else:
1670
+ self.logger.info("Using attack objectives from Azure RAI service")
1671
+ print("📚 Using attack objectives from Azure RAI service")
1672
+
1673
+ # Dictionary to store all objectives
1674
+ all_objectives = {}
1675
+
1676
+ # First fetch baseline objectives for all risk categories
1677
+ # This is important as other strategies depend on baseline objectives
1678
+ self.logger.info("Fetching baseline objectives for all risk categories")
1679
+ for risk_category in self.risk_categories:
1680
+ progress_bar.set_postfix({"current": f"fetching baseline/{risk_category.value}"})
1681
+ self.logger.debug(f"Fetching baseline objectives for {risk_category.value}")
1682
+ baseline_objectives = await self._get_attack_objectives(
1683
+ risk_category=risk_category,
1684
+ application_scenario=application_scenario,
1685
+ strategy="baseline"
1686
+ )
1687
+ if "baseline" not in all_objectives:
1688
+ all_objectives["baseline"] = {}
1689
+ all_objectives["baseline"][risk_category.value] = baseline_objectives
1690
+ print(f"📝 Fetched baseline objectives for {risk_category.value}: {len(baseline_objectives)} objectives")
1691
+
1692
+ # Then fetch objectives for other strategies
1693
+ self.logger.info("Fetching objectives for non-baseline strategies")
1694
+ strategy_count = len(flattened_attack_strategies)
1695
+ for i, strategy in enumerate(flattened_attack_strategies):
1696
+ strategy_name = self._get_strategy_name(strategy)
1697
+ if strategy_name == "baseline":
1698
+ continue # Already fetched
1699
+
1700
+ print(f"🔄 Fetching objectives for strategy {i+1}/{strategy_count}: {strategy_name}")
1701
+ all_objectives[strategy_name] = {}
1702
+
1703
+ for risk_category in self.risk_categories:
1704
+ progress_bar.set_postfix({"current": f"fetching {strategy_name}/{risk_category.value}"})
1705
+ self.logger.debug(f"Fetching objectives for {strategy_name} strategy and {risk_category.value} risk category")
1706
+ objectives = await self._get_attack_objectives(
1707
+ risk_category=risk_category,
1708
+ application_scenario=application_scenario,
1709
+ strategy=strategy_name
1710
+ )
1711
+ all_objectives[strategy_name][risk_category.value] = objectives
1712
+
1713
+
1714
+ self.logger.info("Completed fetching all attack objectives")
1715
+
1716
+ log_section_header(self.logger, "Starting orchestrator processing")
1717
+ # Removed console output
1718
+
1719
+ # Create all tasks for parallel processing
1720
+ orchestrator_tasks = []
1721
+ combinations = list(itertools.product(orchestrators, flattened_attack_strategies, self.risk_categories))
1722
+
1723
+ for combo_idx, (call_orchestrator, strategy, risk_category) in enumerate(combinations):
1724
+ strategy_name = self._get_strategy_name(strategy)
1725
+ objectives = all_objectives[strategy_name][risk_category.value]
1726
+
1727
+ if not objectives:
1728
+ self.logger.warning(f"No objectives found for {strategy_name}+{risk_category.value}, skipping")
1729
+ print(f"⚠️ No objectives found for {strategy_name}/{risk_category.value}, skipping")
1730
+ self.red_team_info[strategy_name][risk_category.value]["status"] = TASK_STATUS["COMPLETED"]
1731
+ async with progress_bar_lock:
1732
+ progress_bar.update(1)
1733
+ continue
1734
+
1735
+ self.logger.debug(f"[{combo_idx+1}/{len(combinations)}] Creating task: {call_orchestrator.__name__} + {strategy_name} + {risk_category.value}")
1736
+
1737
+ orchestrator_tasks.append(
1738
+ self._process_attack(
1739
+ target=target,
1740
+ call_orchestrator=call_orchestrator,
1741
+ all_prompts=objectives,
1742
+ strategy=strategy,
1743
+ progress_bar=progress_bar,
1744
+ progress_bar_lock=progress_bar_lock,
1745
+ scan_name=scan_name,
1746
+ data_only=data_only,
1747
+ output_path=output_path,
1748
+ risk_category=risk_category,
1749
+ timeout=timeout
1750
+ )
1751
+ )
1752
+
1753
+ # Process tasks in parallel with optimized batching
1754
+ if parallel_execution and orchestrator_tasks:
1755
+ print(f"⚙️ Processing {len(orchestrator_tasks)} tasks in parallel (max {max_parallel_tasks} at a time)")
1756
+ self.logger.info(f"Processing {len(orchestrator_tasks)} tasks in parallel (max {max_parallel_tasks} at a time)")
1757
+
1758
+ # Create batches for processing
1759
+ for i in range(0, len(orchestrator_tasks), max_parallel_tasks):
1760
+ end_idx = min(i + max_parallel_tasks, len(orchestrator_tasks))
1761
+ batch = orchestrator_tasks[i:end_idx]
1762
+ progress_bar.set_postfix({"current": f"batch {i//max_parallel_tasks+1}/{math.ceil(len(orchestrator_tasks)/max_parallel_tasks)}"})
1763
+ self.logger.debug(f"Processing batch of {len(batch)} tasks (tasks {i+1} to {end_idx})")
1764
+
1765
+ try:
1766
+ # Add timeout to each batch
1767
+ await asyncio.wait_for(
1768
+ asyncio.gather(*batch),
1769
+ timeout=timeout * 2 # Double timeout for batches
1770
+ )
1771
+ except asyncio.TimeoutError:
1772
+ self.logger.warning(f"Batch {i//max_parallel_tasks+1} timed out after {timeout*2} seconds")
1773
+ print(f"⚠️ Batch {i//max_parallel_tasks+1} timed out, continuing with next batch")
1774
+ # Set task status to TIMEOUT
1775
+ batch_task_key = f"scan_batch_{i//max_parallel_tasks+1}"
1776
+ self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
1777
+ continue
1778
+ except Exception as e:
1779
+ log_error(self.logger, f"Error processing batch {i//max_parallel_tasks+1}", e)
1780
+ self.logger.debug(f"Error in batch {i//max_parallel_tasks+1}: {str(e)}")
1781
+ continue
1782
+ else:
1783
+ # Sequential execution
1784
+ self.logger.info("Running orchestrator processing sequentially")
1785
+ print("⚙️ Processing tasks sequentially")
1786
+ for i, task in enumerate(orchestrator_tasks):
1787
+ progress_bar.set_postfix({"current": f"task {i+1}/{len(orchestrator_tasks)}"})
1788
+ self.logger.debug(f"Processing task {i+1}/{len(orchestrator_tasks)}")
1789
+
1790
+ try:
1791
+ # Add timeout to each task
1792
+ await asyncio.wait_for(task, timeout=timeout)
1793
+ except asyncio.TimeoutError:
1794
+ self.logger.warning(f"Task {i+1}/{len(orchestrator_tasks)} timed out after {timeout} seconds")
1795
+ print(f"⚠️ Task {i+1} timed out, continuing with next task")
1796
+ # Set task status to TIMEOUT
1797
+ task_key = f"scan_task_{i+1}"
1798
+ self.task_statuses[task_key] = TASK_STATUS["TIMEOUT"]
1799
+ continue
1800
+ except Exception as e:
1801
+ log_error(self.logger, f"Error processing task {i+1}/{len(orchestrator_tasks)}", e)
1802
+ self.logger.debug(f"Error in task {i+1}: {str(e)}")
1803
+ continue
1804
+
1805
+ progress_bar.close()
1806
+
1807
+ # Print final status
1808
+ tasks_completed = sum(1 for status in self.task_statuses.values() if status == TASK_STATUS["COMPLETED"])
1809
+ tasks_failed = sum(1 for status in self.task_statuses.values() if status == TASK_STATUS["FAILED"])
1810
+ tasks_timeout = sum(1 for status in self.task_statuses.values() if status == TASK_STATUS["TIMEOUT"])
1811
+
1812
+ total_time = time.time() - self.start_time
1813
+ # Only log the summary to file, don't print to console
1814
+ self.logger.info(f"Scan Summary: Total tasks: {self.total_tasks}, Completed: {tasks_completed}, Failed: {tasks_failed}, Timeouts: {tasks_timeout}, Total time: {total_time/60:.1f} minutes")
1815
+
1816
+ # Process results
1817
+ log_section_header(self.logger, "Processing results")
1818
+
1819
+ # Convert results to RedTeamResult using only red_team_info
1820
+ red_team_result = self._to_red_team_result()
1821
+ scan_result = ScanResult(
1822
+ scorecard=red_team_result["scorecard"],
1823
+ parameters=red_team_result["parameters"],
1824
+ attack_details=red_team_result["attack_details"],
1825
+ studio_url=red_team_result["studio_url"],
1826
+ )
1827
+
1828
+ # Create output with either full results or just conversations
1829
+ if data_only:
1830
+ self.logger.info("Data-only mode, creating output with just conversations")
1831
+ output = RedTeamResult(scan_result=scan_result, attack_details=red_team_result["attack_details"])
1832
+ else:
1833
+ output = RedTeamResult(
1834
+ scan_result=red_team_result,
1835
+ attack_details=red_team_result["attack_details"]
1836
+ )
1837
+
1838
+ # Log results to MLFlow
1839
+ self.logger.info("Logging results to MLFlow")
1840
+ await self._log_redteam_results_to_mlflow(
1841
+ redteam_output=output,
1842
+ eval_run=eval_run,
1843
+ data_only=data_only
1844
+ )
1845
+
1846
+ if data_only:
1847
+ self.logger.info("Data-only mode, returning results without evaluation")
1848
+ return output
1849
+
1850
+ if output_path and output.scan_result:
1851
+ # Ensure output_path is an absolute path
1852
+ abs_output_path = output_path if os.path.isabs(output_path) else os.path.abspath(output_path)
1853
+ self.logger.info(f"Writing output to {abs_output_path}")
1854
+ _write_output(abs_output_path, output.scan_result)
1855
+
1856
+ # Also save a copy to the scan output directory if available
1857
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1858
+ final_output = os.path.join(self.scan_output_dir, "final_results.json")
1859
+ _write_output(final_output, output.scan_result)
1860
+ self.logger.info(f"Also saved a copy to {final_output}")
1861
+ elif output.scan_result and hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1862
+ # If no output_path was specified but we have scan_output_dir, save there
1863
+ final_output = os.path.join(self.scan_output_dir, "final_results.json")
1864
+ _write_output(final_output, output.scan_result)
1865
+ self.logger.info(f"Saved results to {final_output}")
1866
+
1867
+ if output.scan_result:
1868
+ self.logger.debug("Generating scorecard")
1869
+ scorecard = self._to_scorecard(output.scan_result)
1870
+ # Store scorecard in a variable for accessing later if needed
1871
+ self.scorecard = scorecard
1872
+
1873
+ # Print scorecard to console for user visibility (without extra header)
1874
+ print(scorecard)
1875
+
1876
+ # Print URL for detailed results (once only)
1877
+ studio_url = output.scan_result.get("studio_url", "")
1878
+ if studio_url:
1879
+ print(f"\nDetailed results available at:\n{studio_url}")
1880
+
1881
+ # Print the output directory path so the user can find it easily
1882
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1883
+ print(f"\n📂 All scan files saved to: {self.scan_output_dir}")
1884
+
1885
+ print(f"✅ Scan completed successfully!")
1886
+ self.logger.info("Scan completed successfully")
1887
+ return output