azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +0 -16
- azure/ai/evaluation/_common/rai_service.py +1 -1
- azure/ai/evaluation/_common/utils.py +1 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +4 -4
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +84 -68
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
- azure/ai/evaluation/_evaluate/_utils.py +3 -3
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +1 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +1 -1
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +19 -0
- azure/ai/evaluation/{_red_team → red_team}/_attack_objective_generator.py +3 -0
- azure/ai/evaluation/{_red_team → red_team}/_attack_strategy.py +3 -0
- azure/ai/evaluation/{_red_team → red_team}/_red_team.py +96 -67
- azure/ai/evaluation/red_team/_red_team_result.py +382 -0
- azure/ai/evaluation/{_red_team → red_team}/_utils/constants.py +2 -1
- azure/ai/evaluation/{_red_team → red_team}/_utils/formatting_utils.py +23 -22
- azure/ai/evaluation/{_red_team → red_team}/_utils/logging_utils.py +1 -1
- azure/ai/evaluation/{_red_team → red_team}/_utils/strategy_utils.py +8 -4
- azure/ai/evaluation/simulator/_simulator.py +1 -1
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +13 -2
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/RECORD +50 -40
- azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
- azure/ai/evaluation/_red_team/_utils/__init__.py +0 -3
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- /azure/ai/evaluation/{_red_team → red_team}/_callback_chat_target.py +0 -0
- /azure/ai/evaluation/{_red_team → red_team}/_default_converter.py +0 -0
- /azure/ai/evaluation/{_red_team → red_team/_utils}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -43,7 +43,7 @@ from azure.ai.evaluation import evaluate
|
|
|
43
43
|
from azure.core.credentials import TokenCredential
|
|
44
44
|
|
|
45
45
|
# Red Teaming imports
|
|
46
|
-
from ._red_team_result import
|
|
46
|
+
from ._red_team_result import RedTeamResult, RedTeamingScorecard, RedTeamingParameters, ScanResult
|
|
47
47
|
from ._attack_strategy import AttackStrategy
|
|
48
48
|
from ._attack_objective_generator import RiskCategory, _AttackObjectiveGenerator
|
|
49
49
|
|
|
@@ -90,14 +90,17 @@ class RedTeam():
|
|
|
90
90
|
:param max_parallel_tasks: Maximum number of parallel tasks to run when scanning (default: 5)
|
|
91
91
|
:type max_parallel_tasks: int
|
|
92
92
|
"""
|
|
93
|
-
def __init__(
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
azure_ai_project,
|
|
96
|
+
credential,
|
|
97
|
+
*,
|
|
98
|
+
risk_categories: Optional[List[RiskCategory]] = None,
|
|
99
|
+
num_objectives: int = 10,
|
|
100
|
+
application_scenario: Optional[str] = None,
|
|
101
|
+
custom_attack_seed_prompts: Optional[str] = None,
|
|
102
|
+
output_dir=None
|
|
103
|
+
):
|
|
101
104
|
|
|
102
105
|
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
103
106
|
self.credential = credential
|
|
@@ -201,7 +204,7 @@ class RedTeam():
|
|
|
201
204
|
|
|
202
205
|
async def _log_redteam_results_to_mlflow(
|
|
203
206
|
self,
|
|
204
|
-
redteam_output:
|
|
207
|
+
redteam_output: RedTeamResult,
|
|
205
208
|
eval_run: EvalRun,
|
|
206
209
|
data_only: bool = False,
|
|
207
210
|
) -> Optional[str]:
|
|
@@ -227,15 +230,28 @@ class RedTeam():
|
|
|
227
230
|
with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
228
231
|
if data_only:
|
|
229
232
|
# In data_only mode, we write the conversations in conversation/messages format
|
|
230
|
-
f.write(json.dumps({"conversations": redteam_output.
|
|
231
|
-
elif redteam_output.
|
|
232
|
-
json.dump(redteam_output.
|
|
233
|
+
f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
|
|
234
|
+
elif redteam_output.scan_result:
|
|
235
|
+
json.dump(redteam_output.scan_result, f)
|
|
236
|
+
|
|
237
|
+
eval_info_name = "redteam_info.json"
|
|
238
|
+
eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
|
|
239
|
+
self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
|
|
240
|
+
with open (eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
241
|
+
# Remove evaluation_result from red_team_info before logging
|
|
242
|
+
red_team_info_logged = {}
|
|
243
|
+
for strategy, harms_dict in self.red_team_info.items():
|
|
244
|
+
red_team_info_logged[strategy] = {}
|
|
245
|
+
for harm, info_dict in harms_dict.items():
|
|
246
|
+
info_dict.pop("evaluation_result", None)
|
|
247
|
+
red_team_info_logged[strategy][harm] = info_dict
|
|
248
|
+
f.write(json.dumps(red_team_info_logged))
|
|
233
249
|
|
|
234
250
|
# Also save a human-readable scorecard if available
|
|
235
|
-
if not data_only and redteam_output.
|
|
251
|
+
if not data_only and redteam_output.scan_result:
|
|
236
252
|
scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
|
|
237
253
|
with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
238
|
-
f.write(self._to_scorecard(redteam_output.
|
|
254
|
+
f.write(self._to_scorecard(redteam_output.scan_result))
|
|
239
255
|
self.logger.debug(f"Saved scorecard to: {scorecard_path}")
|
|
240
256
|
|
|
241
257
|
# Create a dedicated artifacts directory with proper structure for MLFlow
|
|
@@ -246,9 +262,13 @@ class RedTeam():
|
|
|
246
262
|
# First, create the main artifact file that MLFlow expects
|
|
247
263
|
with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
248
264
|
if data_only:
|
|
249
|
-
f.write(json.dumps({"conversations": redteam_output.
|
|
250
|
-
elif redteam_output.
|
|
251
|
-
|
|
265
|
+
f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
|
|
266
|
+
elif redteam_output.scan_result:
|
|
267
|
+
redteam_output.scan_result["redteaming_scorecard"] = redteam_output.scan_result.get("scorecard", None)
|
|
268
|
+
redteam_output.scan_result["redteaming_parameters"] = redteam_output.scan_result.get("parameters", None)
|
|
269
|
+
redteam_output.scan_result["redteaming_data"] = redteam_output.scan_result.get("attack_details", None)
|
|
270
|
+
|
|
271
|
+
json.dump(redteam_output.scan_result, f)
|
|
252
272
|
|
|
253
273
|
# Copy all relevant files to the temp directory
|
|
254
274
|
import shutil
|
|
@@ -260,6 +280,8 @@ class RedTeam():
|
|
|
260
280
|
continue
|
|
261
281
|
if file.endswith('.log') and not os.environ.get('DEBUG'):
|
|
262
282
|
continue
|
|
283
|
+
if file == artifact_name or file == eval_info_name:
|
|
284
|
+
continue
|
|
263
285
|
|
|
264
286
|
try:
|
|
265
287
|
shutil.copy(file_path, os.path.join(tmpdir, file))
|
|
@@ -270,6 +292,7 @@ class RedTeam():
|
|
|
270
292
|
# Log the entire directory to MLFlow
|
|
271
293
|
try:
|
|
272
294
|
eval_run.log_artifact(tmpdir, artifact_name)
|
|
295
|
+
eval_run.log_artifact(tmpdir, eval_info_name)
|
|
273
296
|
self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
|
|
274
297
|
except Exception as e:
|
|
275
298
|
self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
|
|
@@ -286,9 +309,9 @@ class RedTeam():
|
|
|
286
309
|
artifact_file = Path(tmpdir) / artifact_name
|
|
287
310
|
with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
288
311
|
if data_only:
|
|
289
|
-
f.write(json.dumps({"conversations": redteam_output.
|
|
290
|
-
elif redteam_output.
|
|
291
|
-
json.dump(redteam_output.
|
|
312
|
+
f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
|
|
313
|
+
elif redteam_output.scan_result:
|
|
314
|
+
json.dump(redteam_output.scan_result, f)
|
|
292
315
|
eval_run.log_artifact(tmpdir, artifact_name)
|
|
293
316
|
self.logger.debug(f"Logged artifact: {artifact_name}")
|
|
294
317
|
|
|
@@ -299,8 +322,8 @@ class RedTeam():
|
|
|
299
322
|
"_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
|
|
300
323
|
})
|
|
301
324
|
|
|
302
|
-
if redteam_output.
|
|
303
|
-
scorecard = redteam_output.
|
|
325
|
+
if redteam_output.scan_result:
|
|
326
|
+
scorecard = redteam_output.scan_result["scorecard"]
|
|
304
327
|
joint_attack_summary = scorecard["joint_risk_attack_summary"]
|
|
305
328
|
|
|
306
329
|
if joint_attack_summary:
|
|
@@ -441,11 +464,11 @@ class RedTeam():
|
|
|
441
464
|
self.logger.debug(f"API call: get_attack_objectives({risk_cat_value}, app: {application_scenario}, strategy: {strategy})")
|
|
442
465
|
# strategy param specifies whether to get a strategy-specific dataset from the RAI service
|
|
443
466
|
# right now, only tense requires strategy-specific dataset
|
|
444
|
-
if
|
|
467
|
+
if "tense" in strategy:
|
|
445
468
|
objectives_response = await self.generated_rai_client.get_attack_objectives(
|
|
446
469
|
risk_category=risk_cat_value,
|
|
447
470
|
application_scenario=application_scenario or "",
|
|
448
|
-
strategy=
|
|
471
|
+
strategy="tense"
|
|
449
472
|
)
|
|
450
473
|
else:
|
|
451
474
|
objectives_response = await self.generated_rai_client.get_attack_objectives(
|
|
@@ -675,11 +698,13 @@ class RedTeam():
|
|
|
675
698
|
# Set task status to TIMEOUT
|
|
676
699
|
batch_task_key = f"{strategy_name}_{risk_category}_batch_{batch_idx+1}"
|
|
677
700
|
self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
|
|
701
|
+
self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
678
702
|
# Continue with partial results rather than failing completely
|
|
679
703
|
continue
|
|
680
704
|
except Exception as e:
|
|
681
705
|
log_error(self.logger, f"Error processing batch {batch_idx+1}", e, f"{strategy_name}/{risk_category}")
|
|
682
|
-
|
|
706
|
+
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1}: {str(e)}")
|
|
707
|
+
self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
683
708
|
# Continue with other batches even if one fails
|
|
684
709
|
continue
|
|
685
710
|
else:
|
|
@@ -699,16 +724,18 @@ class RedTeam():
|
|
|
699
724
|
# Set task status to TIMEOUT
|
|
700
725
|
single_batch_task_key = f"{strategy_name}_{risk_category}_single_batch"
|
|
701
726
|
self.task_statuses[single_batch_task_key] = TASK_STATUS["TIMEOUT"]
|
|
727
|
+
self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
702
728
|
except Exception as e:
|
|
703
729
|
log_error(self.logger, "Error processing prompts", e, f"{strategy_name}/{risk_category}")
|
|
704
|
-
|
|
730
|
+
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}: {str(e)}")
|
|
731
|
+
self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
705
732
|
|
|
706
733
|
self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
|
|
707
734
|
return orchestrator
|
|
708
735
|
|
|
709
736
|
except Exception as e:
|
|
710
737
|
log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{risk_category}")
|
|
711
|
-
|
|
738
|
+
self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category}: {str(e)}")
|
|
712
739
|
self.task_statuses[task_key] = TASK_STATUS["FAILED"]
|
|
713
740
|
raise
|
|
714
741
|
|
|
@@ -772,13 +799,13 @@ class RedTeam():
|
|
|
772
799
|
from ._utils.formatting_utils import get_attack_success
|
|
773
800
|
return get_attack_success(result)
|
|
774
801
|
|
|
775
|
-
def _to_red_team_result(self) ->
|
|
776
|
-
"""Convert tracking data from red_team_info to the
|
|
802
|
+
def _to_red_team_result(self) -> RedTeamResult:
|
|
803
|
+
"""Convert tracking data from red_team_info to the RedTeamResult format.
|
|
777
804
|
|
|
778
|
-
Uses only the red_team_info tracking dictionary to build the
|
|
805
|
+
Uses only the red_team_info tracking dictionary to build the RedTeamResult.
|
|
779
806
|
|
|
780
807
|
:return: Structured red team agent results
|
|
781
|
-
:rtype:
|
|
808
|
+
:rtype: RedTeamResult
|
|
782
809
|
"""
|
|
783
810
|
converters = []
|
|
784
811
|
complexity_levels = []
|
|
@@ -791,7 +818,7 @@ class RedTeam():
|
|
|
791
818
|
summary_file = os.path.join(self.scan_output_dir, "attack_summary.csv")
|
|
792
819
|
self.logger.debug(f"Creating attack summary CSV file: {summary_file}")
|
|
793
820
|
|
|
794
|
-
self.logger.info(f"Building
|
|
821
|
+
self.logger.info(f"Building RedTeamResult from red_team_info with {len(self.red_team_info)} strategies")
|
|
795
822
|
|
|
796
823
|
# Process each strategy and risk category from red_team_info
|
|
797
824
|
for strategy_name, risk_data in self.red_team_info.items():
|
|
@@ -1134,20 +1161,20 @@ class RedTeam():
|
|
|
1134
1161
|
complexity_converters = complexity_df["converter"].unique().tolist()
|
|
1135
1162
|
redteaming_parameters["techniques_used"][complexity] = complexity_converters
|
|
1136
1163
|
|
|
1137
|
-
self.logger.info("
|
|
1164
|
+
self.logger.info("RedTeamResult creation completed")
|
|
1138
1165
|
|
|
1139
1166
|
# Create the final result
|
|
1140
|
-
red_team_result =
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1167
|
+
red_team_result = ScanResult(
|
|
1168
|
+
scorecard=cast(RedTeamingScorecard, scorecard),
|
|
1169
|
+
parameters=cast(RedTeamingParameters, redteaming_parameters),
|
|
1170
|
+
attack_details=conversations,
|
|
1144
1171
|
studio_url=self.ai_studio_url or None
|
|
1145
1172
|
)
|
|
1146
1173
|
|
|
1147
1174
|
return red_team_result
|
|
1148
1175
|
|
|
1149
1176
|
# Replace with utility function
|
|
1150
|
-
def _to_scorecard(self, redteam_result:
|
|
1177
|
+
def _to_scorecard(self, redteam_result: RedTeamResult) -> str:
|
|
1151
1178
|
from ._utils.formatting_utils import format_scorecard
|
|
1152
1179
|
return format_scorecard(redteam_result)
|
|
1153
1180
|
|
|
@@ -1266,7 +1293,6 @@ class RedTeam():
|
|
|
1266
1293
|
output_path=result_path,
|
|
1267
1294
|
)
|
|
1268
1295
|
eval_logger.debug(f"Completed evaluation for {risk_category.value}/{strategy_name}")
|
|
1269
|
-
|
|
1270
1296
|
finally:
|
|
1271
1297
|
# Restore original stdout and stderr
|
|
1272
1298
|
sys.stdout = original_stdout
|
|
@@ -1299,6 +1325,7 @@ class RedTeam():
|
|
|
1299
1325
|
self.logger.warning(f"Failed to clean up logger: {str(e)}")
|
|
1300
1326
|
self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["evaluation_result_file"] = str(result_path)
|
|
1301
1327
|
self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["evaluation_result"] = evaluate_outputs
|
|
1328
|
+
self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["status"] = TASK_STATUS["COMPLETED"]
|
|
1302
1329
|
self.logger.debug(f"Evaluation complete for {strategy_name}/{risk_category.value}, results stored in red_team_info")
|
|
1303
1330
|
|
|
1304
1331
|
async def _process_attack(
|
|
@@ -1344,7 +1371,7 @@ class RedTeam():
|
|
|
1344
1371
|
orchestrator = await call_orchestrator(self.chat_target, all_prompts, converter, strategy_name, risk_category.value, timeout)
|
|
1345
1372
|
except PyritException as e:
|
|
1346
1373
|
log_error(self.logger, f"Error calling orchestrator for {strategy_name} strategy", e)
|
|
1347
|
-
|
|
1374
|
+
self.logger.debug(f"Orchestrator error for {strategy_name}/{risk_category.value}: {str(e)}")
|
|
1348
1375
|
self.task_statuses[task_key] = TASK_STATUS["FAILED"]
|
|
1349
1376
|
self.failed_tasks += 1
|
|
1350
1377
|
|
|
@@ -1370,6 +1397,7 @@ class RedTeam():
|
|
|
1370
1397
|
except Exception as e:
|
|
1371
1398
|
log_error(self.logger, f"Error during evaluation for {strategy_name}/{risk_category.value}", e)
|
|
1372
1399
|
print(f"⚠️ Evaluation error for {strategy_name}/{risk_category.value}: {str(e)}")
|
|
1400
|
+
self.red_team_info[strategy_name][risk_category.value]["status"] = TASK_STATUS["FAILED"]
|
|
1373
1401
|
# Continue processing even if evaluation fails
|
|
1374
1402
|
|
|
1375
1403
|
async with progress_bar_lock:
|
|
@@ -1399,7 +1427,7 @@ class RedTeam():
|
|
|
1399
1427
|
|
|
1400
1428
|
except Exception as e:
|
|
1401
1429
|
log_error(self.logger, f"Unexpected error processing {strategy_name} strategy for {risk_category.value}", e)
|
|
1402
|
-
|
|
1430
|
+
self.logger.debug(f"Critical error in task {strategy_name}/{risk_category.value}: {str(e)}")
|
|
1403
1431
|
self.task_statuses[task_key] = TASK_STATUS["FAILED"]
|
|
1404
1432
|
self.failed_tasks += 1
|
|
1405
1433
|
|
|
@@ -1409,8 +1437,9 @@ class RedTeam():
|
|
|
1409
1437
|
return None
|
|
1410
1438
|
|
|
1411
1439
|
async def scan(
|
|
1412
|
-
self,
|
|
1440
|
+
self,
|
|
1413
1441
|
target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget],
|
|
1442
|
+
*,
|
|
1414
1443
|
scan_name: Optional[str] = None,
|
|
1415
1444
|
num_turns : int = 1,
|
|
1416
1445
|
attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]] = [],
|
|
@@ -1419,8 +1448,8 @@ class RedTeam():
|
|
|
1419
1448
|
application_scenario: Optional[str] = None,
|
|
1420
1449
|
parallel_execution: bool = True,
|
|
1421
1450
|
max_parallel_tasks: int = 5,
|
|
1422
|
-
|
|
1423
|
-
|
|
1451
|
+
timeout: int = 120
|
|
1452
|
+
) -> RedTeamResult:
|
|
1424
1453
|
"""Run a red team scan against the target using the specified strategies.
|
|
1425
1454
|
|
|
1426
1455
|
:param target: The target model or function to scan
|
|
@@ -1441,8 +1470,6 @@ class RedTeam():
|
|
|
1441
1470
|
:type parallel_execution: bool
|
|
1442
1471
|
:param max_parallel_tasks: Maximum number of parallel orchestrator tasks to run (default: 5)
|
|
1443
1472
|
:type max_parallel_tasks: int
|
|
1444
|
-
:param debug_mode: Whether to run in debug mode (more verbose output)
|
|
1445
|
-
:type debug_mode: bool
|
|
1446
1473
|
:param timeout: The timeout in seconds for API calls (default: 120)
|
|
1447
1474
|
:type timeout: int
|
|
1448
1475
|
:return: The output from the red team scan
|
|
@@ -1522,7 +1549,7 @@ class RedTeam():
|
|
|
1522
1549
|
if not self.attack_objective_generator:
|
|
1523
1550
|
error_msg = "Attack objective generator is required for red team agent."
|
|
1524
1551
|
log_error(self.logger, error_msg)
|
|
1525
|
-
|
|
1552
|
+
self.logger.debug(f"{error_msg}")
|
|
1526
1553
|
raise EvaluationException(
|
|
1527
1554
|
message=error_msg,
|
|
1528
1555
|
internal_message="Attack objective generator is not provided.",
|
|
@@ -1676,7 +1703,6 @@ class RedTeam():
|
|
|
1676
1703
|
for risk_category in self.risk_categories:
|
|
1677
1704
|
progress_bar.set_postfix({"current": f"fetching {strategy_name}/{risk_category.value}"})
|
|
1678
1705
|
self.logger.debug(f"Fetching objectives for {strategy_name} strategy and {risk_category.value} risk category")
|
|
1679
|
-
|
|
1680
1706
|
objectives = await self._get_attack_objectives(
|
|
1681
1707
|
risk_category=risk_category,
|
|
1682
1708
|
application_scenario=application_scenario,
|
|
@@ -1684,9 +1710,6 @@ class RedTeam():
|
|
|
1684
1710
|
)
|
|
1685
1711
|
all_objectives[strategy_name][risk_category.value] = objectives
|
|
1686
1712
|
|
|
1687
|
-
# Print status about objective count for this strategy/risk
|
|
1688
|
-
if debug_mode:
|
|
1689
|
-
print(f" - {risk_category.value}: {len(objectives)} objectives")
|
|
1690
1713
|
|
|
1691
1714
|
self.logger.info("Completed fetching all attack objectives")
|
|
1692
1715
|
|
|
@@ -1754,7 +1777,7 @@ class RedTeam():
|
|
|
1754
1777
|
continue
|
|
1755
1778
|
except Exception as e:
|
|
1756
1779
|
log_error(self.logger, f"Error processing batch {i//max_parallel_tasks+1}", e)
|
|
1757
|
-
|
|
1780
|
+
self.logger.debug(f"Error in batch {i//max_parallel_tasks+1}: {str(e)}")
|
|
1758
1781
|
continue
|
|
1759
1782
|
else:
|
|
1760
1783
|
# Sequential execution
|
|
@@ -1776,7 +1799,7 @@ class RedTeam():
|
|
|
1776
1799
|
continue
|
|
1777
1800
|
except Exception as e:
|
|
1778
1801
|
log_error(self.logger, f"Error processing task {i+1}/{len(orchestrator_tasks)}", e)
|
|
1779
|
-
|
|
1802
|
+
self.logger.debug(f"Error in task {i+1}: {str(e)}")
|
|
1780
1803
|
continue
|
|
1781
1804
|
|
|
1782
1805
|
progress_bar.close()
|
|
@@ -1793,17 +1816,23 @@ class RedTeam():
|
|
|
1793
1816
|
# Process results
|
|
1794
1817
|
log_section_header(self.logger, "Processing results")
|
|
1795
1818
|
|
|
1796
|
-
# Convert results to
|
|
1819
|
+
# Convert results to RedTeamResult using only red_team_info
|
|
1797
1820
|
red_team_result = self._to_red_team_result()
|
|
1821
|
+
scan_result = ScanResult(
|
|
1822
|
+
scorecard=red_team_result["scorecard"],
|
|
1823
|
+
parameters=red_team_result["parameters"],
|
|
1824
|
+
attack_details=red_team_result["attack_details"],
|
|
1825
|
+
studio_url=red_team_result["studio_url"],
|
|
1826
|
+
)
|
|
1798
1827
|
|
|
1799
1828
|
# Create output with either full results or just conversations
|
|
1800
1829
|
if data_only:
|
|
1801
1830
|
self.logger.info("Data-only mode, creating output with just conversations")
|
|
1802
|
-
output =
|
|
1831
|
+
output = RedTeamResult(scan_result=scan_result, attack_details=red_team_result["attack_details"])
|
|
1803
1832
|
else:
|
|
1804
|
-
output =
|
|
1805
|
-
|
|
1806
|
-
|
|
1833
|
+
output = RedTeamResult(
|
|
1834
|
+
scan_result=red_team_result,
|
|
1835
|
+
attack_details=red_team_result["attack_details"]
|
|
1807
1836
|
)
|
|
1808
1837
|
|
|
1809
1838
|
# Log results to MLFlow
|
|
@@ -1818,26 +1847,26 @@ class RedTeam():
|
|
|
1818
1847
|
self.logger.info("Data-only mode, returning results without evaluation")
|
|
1819
1848
|
return output
|
|
1820
1849
|
|
|
1821
|
-
if output_path and output.
|
|
1850
|
+
if output_path and output.scan_result:
|
|
1822
1851
|
# Ensure output_path is an absolute path
|
|
1823
1852
|
abs_output_path = output_path if os.path.isabs(output_path) else os.path.abspath(output_path)
|
|
1824
1853
|
self.logger.info(f"Writing output to {abs_output_path}")
|
|
1825
|
-
_write_output(abs_output_path, output.
|
|
1854
|
+
_write_output(abs_output_path, output.scan_result)
|
|
1826
1855
|
|
|
1827
1856
|
# Also save a copy to the scan output directory if available
|
|
1828
1857
|
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
1829
1858
|
final_output = os.path.join(self.scan_output_dir, "final_results.json")
|
|
1830
|
-
_write_output(final_output, output.
|
|
1859
|
+
_write_output(final_output, output.scan_result)
|
|
1831
1860
|
self.logger.info(f"Also saved a copy to {final_output}")
|
|
1832
|
-
elif output.
|
|
1861
|
+
elif output.scan_result and hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
1833
1862
|
# If no output_path was specified but we have scan_output_dir, save there
|
|
1834
1863
|
final_output = os.path.join(self.scan_output_dir, "final_results.json")
|
|
1835
|
-
_write_output(final_output, output.
|
|
1864
|
+
_write_output(final_output, output.scan_result)
|
|
1836
1865
|
self.logger.info(f"Saved results to {final_output}")
|
|
1837
1866
|
|
|
1838
|
-
if output.
|
|
1867
|
+
if output.scan_result:
|
|
1839
1868
|
self.logger.debug("Generating scorecard")
|
|
1840
|
-
scorecard = self._to_scorecard(output.
|
|
1869
|
+
scorecard = self._to_scorecard(output.scan_result)
|
|
1841
1870
|
# Store scorecard in a variable for accessing later if needed
|
|
1842
1871
|
self.scorecard = scorecard
|
|
1843
1872
|
|
|
@@ -1845,7 +1874,7 @@ class RedTeam():
|
|
|
1845
1874
|
print(scorecard)
|
|
1846
1875
|
|
|
1847
1876
|
# Print URL for detailed results (once only)
|
|
1848
|
-
studio_url = output.
|
|
1877
|
+
studio_url = output.scan_result.get("studio_url", "")
|
|
1849
1878
|
if studio_url:
|
|
1850
1879
|
print(f"\nDetailed results available at:\n{studio_url}")
|
|
1851
1880
|
|