azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (53) hide show
  1. azure/ai/evaluation/__init__.py +0 -16
  2. azure/ai/evaluation/_common/rai_service.py +1 -1
  3. azure/ai/evaluation/_common/utils.py +1 -1
  4. azure/ai/evaluation/_converters/__init__.py +1 -1
  5. azure/ai/evaluation/_converters/_ai_services.py +4 -4
  6. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  7. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
  8. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
  9. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
  10. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  11. azure/ai/evaluation/_evaluate/_evaluate.py +84 -68
  12. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
  13. azure/ai/evaluation/_evaluate/_utils.py +3 -3
  14. azure/ai/evaluation/_evaluators/_common/_base_eval.py +1 -1
  15. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
  16. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +1 -1
  17. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
  18. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +1 -0
  19. azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
  20. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  21. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  22. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  23. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  24. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  25. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  26. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  27. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  28. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  29. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  30. azure/ai/evaluation/_legacy/_batch_engine/_result.py +1 -1
  31. azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
  32. azure/ai/evaluation/_version.py +1 -1
  33. azure/ai/evaluation/red_team/__init__.py +19 -0
  34. azure/ai/evaluation/{_red_team → red_team}/_attack_objective_generator.py +3 -0
  35. azure/ai/evaluation/{_red_team → red_team}/_attack_strategy.py +3 -0
  36. azure/ai/evaluation/{_red_team → red_team}/_red_team.py +96 -67
  37. azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  38. azure/ai/evaluation/{_red_team → red_team}/_utils/constants.py +2 -1
  39. azure/ai/evaluation/{_red_team → red_team}/_utils/formatting_utils.py +23 -22
  40. azure/ai/evaluation/{_red_team → red_team}/_utils/logging_utils.py +1 -1
  41. azure/ai/evaluation/{_red_team → red_team}/_utils/strategy_utils.py +8 -4
  42. azure/ai/evaluation/simulator/_simulator.py +1 -1
  43. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +13 -2
  44. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/RECORD +50 -40
  45. azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
  46. azure/ai/evaluation/_red_team/_utils/__init__.py +0 -3
  47. azure/ai/evaluation/simulator/_tracing.py +0 -89
  48. /azure/ai/evaluation/{_red_team → red_team}/_callback_chat_target.py +0 -0
  49. /azure/ai/evaluation/{_red_team → red_team}/_default_converter.py +0 -0
  50. /azure/ai/evaluation/{_red_team → red_team/_utils}/__init__.py +0 -0
  51. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
  52. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +0 -0
  53. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
@@ -43,7 +43,7 @@ from azure.ai.evaluation import evaluate
43
43
  from azure.core.credentials import TokenCredential
44
44
 
45
45
  # Red Teaming imports
46
- from ._red_team_result import _RedTeamResult, _RedTeamingScorecard, _RedTeamingParameters, RedTeamOutput
46
+ from ._red_team_result import RedTeamResult, RedTeamingScorecard, RedTeamingParameters, ScanResult
47
47
  from ._attack_strategy import AttackStrategy
48
48
  from ._attack_objective_generator import RiskCategory, _AttackObjectiveGenerator
49
49
 
@@ -90,14 +90,17 @@ class RedTeam():
90
90
  :param max_parallel_tasks: Maximum number of parallel tasks to run when scanning (default: 5)
91
91
  :type max_parallel_tasks: int
92
92
  """
93
- def __init__(self,
94
- azure_ai_project,
95
- credential,
96
- risk_categories: Optional[List[RiskCategory]] = None,
97
- num_objectives: int = 10,
98
- application_scenario: Optional[str] = None,
99
- custom_attack_seed_prompts: Optional[str] = None,
100
- output_dir=None):
93
+ def __init__(
94
+ self,
95
+ azure_ai_project,
96
+ credential,
97
+ *,
98
+ risk_categories: Optional[List[RiskCategory]] = None,
99
+ num_objectives: int = 10,
100
+ application_scenario: Optional[str] = None,
101
+ custom_attack_seed_prompts: Optional[str] = None,
102
+ output_dir=None
103
+ ):
101
104
 
102
105
  self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
103
106
  self.credential = credential
@@ -201,7 +204,7 @@ class RedTeam():
201
204
 
202
205
  async def _log_redteam_results_to_mlflow(
203
206
  self,
204
- redteam_output: RedTeamOutput,
207
+ redteam_output: RedTeamResult,
205
208
  eval_run: EvalRun,
206
209
  data_only: bool = False,
207
210
  ) -> Optional[str]:
@@ -227,15 +230,28 @@ class RedTeam():
227
230
  with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
228
231
  if data_only:
229
232
  # In data_only mode, we write the conversations in conversation/messages format
230
- f.write(json.dumps({"conversations": redteam_output.redteaming_data or []}))
231
- elif redteam_output.red_team_result:
232
- json.dump(redteam_output.red_team_result, f)
233
+ f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
234
+ elif redteam_output.scan_result:
235
+ json.dump(redteam_output.scan_result, f)
236
+
237
+ eval_info_name = "redteam_info.json"
238
+ eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
239
+ self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
240
+ with open (eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
241
+ # Remove evaluation_result from red_team_info before logging
242
+ red_team_info_logged = {}
243
+ for strategy, harms_dict in self.red_team_info.items():
244
+ red_team_info_logged[strategy] = {}
245
+ for harm, info_dict in harms_dict.items():
246
+ info_dict.pop("evaluation_result", None)
247
+ red_team_info_logged[strategy][harm] = info_dict
248
+ f.write(json.dumps(red_team_info_logged))
233
249
 
234
250
  # Also save a human-readable scorecard if available
235
- if not data_only and redteam_output.red_team_result:
251
+ if not data_only and redteam_output.scan_result:
236
252
  scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
237
253
  with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
238
- f.write(self._to_scorecard(redteam_output.red_team_result))
254
+ f.write(self._to_scorecard(redteam_output.scan_result))
239
255
  self.logger.debug(f"Saved scorecard to: {scorecard_path}")
240
256
 
241
257
  # Create a dedicated artifacts directory with proper structure for MLFlow
@@ -246,9 +262,13 @@ class RedTeam():
246
262
  # First, create the main artifact file that MLFlow expects
247
263
  with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
248
264
  if data_only:
249
- f.write(json.dumps({"conversations": redteam_output.redteaming_data or []}))
250
- elif redteam_output.red_team_result:
251
- json.dump(redteam_output.red_team_result, f)
265
+ f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
266
+ elif redteam_output.scan_result:
267
+ redteam_output.scan_result["redteaming_scorecard"] = redteam_output.scan_result.get("scorecard", None)
268
+ redteam_output.scan_result["redteaming_parameters"] = redteam_output.scan_result.get("parameters", None)
269
+ redteam_output.scan_result["redteaming_data"] = redteam_output.scan_result.get("attack_details", None)
270
+
271
+ json.dump(redteam_output.scan_result, f)
252
272
 
253
273
  # Copy all relevant files to the temp directory
254
274
  import shutil
@@ -260,6 +280,8 @@ class RedTeam():
260
280
  continue
261
281
  if file.endswith('.log') and not os.environ.get('DEBUG'):
262
282
  continue
283
+ if file == artifact_name or file == eval_info_name:
284
+ continue
263
285
 
264
286
  try:
265
287
  shutil.copy(file_path, os.path.join(tmpdir, file))
@@ -270,6 +292,7 @@ class RedTeam():
270
292
  # Log the entire directory to MLFlow
271
293
  try:
272
294
  eval_run.log_artifact(tmpdir, artifact_name)
295
+ eval_run.log_artifact(tmpdir, eval_info_name)
273
296
  self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
274
297
  except Exception as e:
275
298
  self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
@@ -286,9 +309,9 @@ class RedTeam():
286
309
  artifact_file = Path(tmpdir) / artifact_name
287
310
  with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
288
311
  if data_only:
289
- f.write(json.dumps({"conversations": redteam_output.redteaming_data or []}))
290
- elif redteam_output.red_team_result:
291
- json.dump(redteam_output.red_team_result, f)
312
+ f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
313
+ elif redteam_output.scan_result:
314
+ json.dump(redteam_output.scan_result, f)
292
315
  eval_run.log_artifact(tmpdir, artifact_name)
293
316
  self.logger.debug(f"Logged artifact: {artifact_name}")
294
317
 
@@ -299,8 +322,8 @@ class RedTeam():
299
322
  "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
300
323
  })
301
324
 
302
- if redteam_output.red_team_result:
303
- scorecard = redteam_output.red_team_result["redteaming_scorecard"]
325
+ if redteam_output.scan_result:
326
+ scorecard = redteam_output.scan_result["scorecard"]
304
327
  joint_attack_summary = scorecard["joint_risk_attack_summary"]
305
328
 
306
329
  if joint_attack_summary:
@@ -441,11 +464,11 @@ class RedTeam():
441
464
  self.logger.debug(f"API call: get_attack_objectives({risk_cat_value}, app: {application_scenario}, strategy: {strategy})")
442
465
  # strategy param specifies whether to get a strategy-specific dataset from the RAI service
443
466
  # right now, only tense requires strategy-specific dataset
444
- if strategy == "tense":
467
+ if "tense" in strategy:
445
468
  objectives_response = await self.generated_rai_client.get_attack_objectives(
446
469
  risk_category=risk_cat_value,
447
470
  application_scenario=application_scenario or "",
448
- strategy=strategy
471
+ strategy="tense"
449
472
  )
450
473
  else:
451
474
  objectives_response = await self.generated_rai_client.get_attack_objectives(
@@ -675,11 +698,13 @@ class RedTeam():
675
698
  # Set task status to TIMEOUT
676
699
  batch_task_key = f"{strategy_name}_{risk_category}_batch_{batch_idx+1}"
677
700
  self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
701
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
678
702
  # Continue with partial results rather than failing completely
679
703
  continue
680
704
  except Exception as e:
681
705
  log_error(self.logger, f"Error processing batch {batch_idx+1}", e, f"{strategy_name}/{risk_category}")
682
- print(f"ERROR: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1}: {str(e)}")
706
+ self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1}: {str(e)}")
707
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
683
708
  # Continue with other batches even if one fails
684
709
  continue
685
710
  else:
@@ -699,16 +724,18 @@ class RedTeam():
699
724
  # Set task status to TIMEOUT
700
725
  single_batch_task_key = f"{strategy_name}_{risk_category}_single_batch"
701
726
  self.task_statuses[single_batch_task_key] = TASK_STATUS["TIMEOUT"]
727
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
702
728
  except Exception as e:
703
729
  log_error(self.logger, "Error processing prompts", e, f"{strategy_name}/{risk_category}")
704
- print(f"ERROR: Strategy {strategy_name}, Risk {risk_category}: {str(e)}")
730
+ self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}: {str(e)}")
731
+ self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"]
705
732
 
706
733
  self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
707
734
  return orchestrator
708
735
 
709
736
  except Exception as e:
710
737
  log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{risk_category}")
711
- print(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category}: {str(e)}")
738
+ self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category}: {str(e)}")
712
739
  self.task_statuses[task_key] = TASK_STATUS["FAILED"]
713
740
  raise
714
741
 
@@ -772,13 +799,13 @@ class RedTeam():
772
799
  from ._utils.formatting_utils import get_attack_success
773
800
  return get_attack_success(result)
774
801
 
775
- def _to_red_team_result(self) -> _RedTeamResult:
776
- """Convert tracking data from red_team_info to the _RedTeamResult format.
802
+ def _to_red_team_result(self) -> RedTeamResult:
803
+ """Convert tracking data from red_team_info to the RedTeamResult format.
777
804
 
778
- Uses only the red_team_info tracking dictionary to build the _RedTeamResult.
805
+ Uses only the red_team_info tracking dictionary to build the RedTeamResult.
779
806
 
780
807
  :return: Structured red team agent results
781
- :rtype: _RedTeamResult
808
+ :rtype: RedTeamResult
782
809
  """
783
810
  converters = []
784
811
  complexity_levels = []
@@ -791,7 +818,7 @@ class RedTeam():
791
818
  summary_file = os.path.join(self.scan_output_dir, "attack_summary.csv")
792
819
  self.logger.debug(f"Creating attack summary CSV file: {summary_file}")
793
820
 
794
- self.logger.info(f"Building _RedTeamResult from red_team_info with {len(self.red_team_info)} strategies")
821
+ self.logger.info(f"Building RedTeamResult from red_team_info with {len(self.red_team_info)} strategies")
795
822
 
796
823
  # Process each strategy and risk category from red_team_info
797
824
  for strategy_name, risk_data in self.red_team_info.items():
@@ -1134,20 +1161,20 @@ class RedTeam():
1134
1161
  complexity_converters = complexity_df["converter"].unique().tolist()
1135
1162
  redteaming_parameters["techniques_used"][complexity] = complexity_converters
1136
1163
 
1137
- self.logger.info("_RedTeamResult creation completed")
1164
+ self.logger.info("RedTeamResult creation completed")
1138
1165
 
1139
1166
  # Create the final result
1140
- red_team_result = _RedTeamResult(
1141
- redteaming_scorecard=cast(_RedTeamingScorecard, scorecard),
1142
- redteaming_parameters=cast(_RedTeamingParameters, redteaming_parameters),
1143
- redteaming_data=conversations,
1167
+ red_team_result = ScanResult(
1168
+ scorecard=cast(RedTeamingScorecard, scorecard),
1169
+ parameters=cast(RedTeamingParameters, redteaming_parameters),
1170
+ attack_details=conversations,
1144
1171
  studio_url=self.ai_studio_url or None
1145
1172
  )
1146
1173
 
1147
1174
  return red_team_result
1148
1175
 
1149
1176
  # Replace with utility function
1150
- def _to_scorecard(self, redteam_result: _RedTeamResult) -> str:
1177
+ def _to_scorecard(self, redteam_result: RedTeamResult) -> str:
1151
1178
  from ._utils.formatting_utils import format_scorecard
1152
1179
  return format_scorecard(redteam_result)
1153
1180
 
@@ -1266,7 +1293,6 @@ class RedTeam():
1266
1293
  output_path=result_path,
1267
1294
  )
1268
1295
  eval_logger.debug(f"Completed evaluation for {risk_category.value}/{strategy_name}")
1269
-
1270
1296
  finally:
1271
1297
  # Restore original stdout and stderr
1272
1298
  sys.stdout = original_stdout
@@ -1299,6 +1325,7 @@ class RedTeam():
1299
1325
  self.logger.warning(f"Failed to clean up logger: {str(e)}")
1300
1326
  self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["evaluation_result_file"] = str(result_path)
1301
1327
  self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["evaluation_result"] = evaluate_outputs
1328
+ self.red_team_info[self._get_strategy_name(strategy)][risk_category.value]["status"] = TASK_STATUS["COMPLETED"]
1302
1329
  self.logger.debug(f"Evaluation complete for {strategy_name}/{risk_category.value}, results stored in red_team_info")
1303
1330
 
1304
1331
  async def _process_attack(
@@ -1344,7 +1371,7 @@ class RedTeam():
1344
1371
  orchestrator = await call_orchestrator(self.chat_target, all_prompts, converter, strategy_name, risk_category.value, timeout)
1345
1372
  except PyritException as e:
1346
1373
  log_error(self.logger, f"Error calling orchestrator for {strategy_name} strategy", e)
1347
- print(f"Orchestrator error for {strategy_name}/{risk_category.value}: {str(e)}")
1374
+ self.logger.debug(f"Orchestrator error for {strategy_name}/{risk_category.value}: {str(e)}")
1348
1375
  self.task_statuses[task_key] = TASK_STATUS["FAILED"]
1349
1376
  self.failed_tasks += 1
1350
1377
 
@@ -1370,6 +1397,7 @@ class RedTeam():
1370
1397
  except Exception as e:
1371
1398
  log_error(self.logger, f"Error during evaluation for {strategy_name}/{risk_category.value}", e)
1372
1399
  print(f"⚠️ Evaluation error for {strategy_name}/{risk_category.value}: {str(e)}")
1400
+ self.red_team_info[strategy_name][risk_category.value]["status"] = TASK_STATUS["FAILED"]
1373
1401
  # Continue processing even if evaluation fails
1374
1402
 
1375
1403
  async with progress_bar_lock:
@@ -1399,7 +1427,7 @@ class RedTeam():
1399
1427
 
1400
1428
  except Exception as e:
1401
1429
  log_error(self.logger, f"Unexpected error processing {strategy_name} strategy for {risk_category.value}", e)
1402
- print(f"Critical error in task {strategy_name}/{risk_category.value}: {str(e)}")
1430
+ self.logger.debug(f"Critical error in task {strategy_name}/{risk_category.value}: {str(e)}")
1403
1431
  self.task_statuses[task_key] = TASK_STATUS["FAILED"]
1404
1432
  self.failed_tasks += 1
1405
1433
 
@@ -1409,8 +1437,9 @@ class RedTeam():
1409
1437
  return None
1410
1438
 
1411
1439
  async def scan(
1412
- self,
1440
+ self,
1413
1441
  target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget],
1442
+ *,
1414
1443
  scan_name: Optional[str] = None,
1415
1444
  num_turns : int = 1,
1416
1445
  attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]] = [],
@@ -1419,8 +1448,8 @@ class RedTeam():
1419
1448
  application_scenario: Optional[str] = None,
1420
1449
  parallel_execution: bool = True,
1421
1450
  max_parallel_tasks: int = 5,
1422
- debug_mode: bool = False,
1423
- timeout: int = 120) -> RedTeamOutput:
1451
+ timeout: int = 120
1452
+ ) -> RedTeamResult:
1424
1453
  """Run a red team scan against the target using the specified strategies.
1425
1454
 
1426
1455
  :param target: The target model or function to scan
@@ -1441,8 +1470,6 @@ class RedTeam():
1441
1470
  :type parallel_execution: bool
1442
1471
  :param max_parallel_tasks: Maximum number of parallel orchestrator tasks to run (default: 5)
1443
1472
  :type max_parallel_tasks: int
1444
- :param debug_mode: Whether to run in debug mode (more verbose output)
1445
- :type debug_mode: bool
1446
1473
  :param timeout: The timeout in seconds for API calls (default: 120)
1447
1474
  :type timeout: int
1448
1475
  :return: The output from the red team scan
@@ -1522,7 +1549,7 @@ class RedTeam():
1522
1549
  if not self.attack_objective_generator:
1523
1550
  error_msg = "Attack objective generator is required for red team agent."
1524
1551
  log_error(self.logger, error_msg)
1525
- print(f"{error_msg}")
1552
+ self.logger.debug(f"{error_msg}")
1526
1553
  raise EvaluationException(
1527
1554
  message=error_msg,
1528
1555
  internal_message="Attack objective generator is not provided.",
@@ -1676,7 +1703,6 @@ class RedTeam():
1676
1703
  for risk_category in self.risk_categories:
1677
1704
  progress_bar.set_postfix({"current": f"fetching {strategy_name}/{risk_category.value}"})
1678
1705
  self.logger.debug(f"Fetching objectives for {strategy_name} strategy and {risk_category.value} risk category")
1679
-
1680
1706
  objectives = await self._get_attack_objectives(
1681
1707
  risk_category=risk_category,
1682
1708
  application_scenario=application_scenario,
@@ -1684,9 +1710,6 @@ class RedTeam():
1684
1710
  )
1685
1711
  all_objectives[strategy_name][risk_category.value] = objectives
1686
1712
 
1687
- # Print status about objective count for this strategy/risk
1688
- if debug_mode:
1689
- print(f" - {risk_category.value}: {len(objectives)} objectives")
1690
1713
 
1691
1714
  self.logger.info("Completed fetching all attack objectives")
1692
1715
 
@@ -1754,7 +1777,7 @@ class RedTeam():
1754
1777
  continue
1755
1778
  except Exception as e:
1756
1779
  log_error(self.logger, f"Error processing batch {i//max_parallel_tasks+1}", e)
1757
- print(f"Error in batch {i//max_parallel_tasks+1}: {str(e)}")
1780
+ self.logger.debug(f"Error in batch {i//max_parallel_tasks+1}: {str(e)}")
1758
1781
  continue
1759
1782
  else:
1760
1783
  # Sequential execution
@@ -1776,7 +1799,7 @@ class RedTeam():
1776
1799
  continue
1777
1800
  except Exception as e:
1778
1801
  log_error(self.logger, f"Error processing task {i+1}/{len(orchestrator_tasks)}", e)
1779
- print(f"Error in task {i+1}: {str(e)}")
1802
+ self.logger.debug(f"Error in task {i+1}: {str(e)}")
1780
1803
  continue
1781
1804
 
1782
1805
  progress_bar.close()
@@ -1793,17 +1816,23 @@ class RedTeam():
1793
1816
  # Process results
1794
1817
  log_section_header(self.logger, "Processing results")
1795
1818
 
1796
- # Convert results to _RedTeamResult using only red_team_info
1819
+ # Convert results to RedTeamResult using only red_team_info
1797
1820
  red_team_result = self._to_red_team_result()
1821
+ scan_result = ScanResult(
1822
+ scorecard=red_team_result["scorecard"],
1823
+ parameters=red_team_result["parameters"],
1824
+ attack_details=red_team_result["attack_details"],
1825
+ studio_url=red_team_result["studio_url"],
1826
+ )
1798
1827
 
1799
1828
  # Create output with either full results or just conversations
1800
1829
  if data_only:
1801
1830
  self.logger.info("Data-only mode, creating output with just conversations")
1802
- output = RedTeamOutput(redteaming_data=red_team_result["redteaming_data"])
1831
+ output = RedTeamResult(scan_result=scan_result, attack_details=red_team_result["attack_details"])
1803
1832
  else:
1804
- output = RedTeamOutput(
1805
- red_team_result=red_team_result,
1806
- redteaming_data=red_team_result["redteaming_data"]
1833
+ output = RedTeamResult(
1834
+ scan_result=red_team_result,
1835
+ attack_details=red_team_result["attack_details"]
1807
1836
  )
1808
1837
 
1809
1838
  # Log results to MLFlow
@@ -1818,26 +1847,26 @@ class RedTeam():
1818
1847
  self.logger.info("Data-only mode, returning results without evaluation")
1819
1848
  return output
1820
1849
 
1821
- if output_path and output.red_team_result:
1850
+ if output_path and output.scan_result:
1822
1851
  # Ensure output_path is an absolute path
1823
1852
  abs_output_path = output_path if os.path.isabs(output_path) else os.path.abspath(output_path)
1824
1853
  self.logger.info(f"Writing output to {abs_output_path}")
1825
- _write_output(abs_output_path, output.red_team_result)
1854
+ _write_output(abs_output_path, output.scan_result)
1826
1855
 
1827
1856
  # Also save a copy to the scan output directory if available
1828
1857
  if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1829
1858
  final_output = os.path.join(self.scan_output_dir, "final_results.json")
1830
- _write_output(final_output, output.red_team_result)
1859
+ _write_output(final_output, output.scan_result)
1831
1860
  self.logger.info(f"Also saved a copy to {final_output}")
1832
- elif output.red_team_result and hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1861
+ elif output.scan_result and hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1833
1862
  # If no output_path was specified but we have scan_output_dir, save there
1834
1863
  final_output = os.path.join(self.scan_output_dir, "final_results.json")
1835
- _write_output(final_output, output.red_team_result)
1864
+ _write_output(final_output, output.scan_result)
1836
1865
  self.logger.info(f"Saved results to {final_output}")
1837
1866
 
1838
- if output.red_team_result:
1867
+ if output.scan_result:
1839
1868
  self.logger.debug("Generating scorecard")
1840
- scorecard = self._to_scorecard(output.red_team_result)
1869
+ scorecard = self._to_scorecard(output.scan_result)
1841
1870
  # Store scorecard in a variable for accessing later if needed
1842
1871
  self.scorecard = scorecard
1843
1872
 
@@ -1845,7 +1874,7 @@ class RedTeam():
1845
1874
  print(scorecard)
1846
1875
 
1847
1876
  # Print URL for detailed results (once only)
1848
- studio_url = output.red_team_result.get("studio_url", "")
1877
+ studio_url = output.scan_result.get("studio_url", "")
1849
1878
  if studio_url:
1850
1879
  print(f"\nDetailed results available at:\n{studio_url}")
1851
1880