azure-ai-evaluation 1.11.2__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. azure/ai/evaluation/__init__.py +2 -0
  2. azure/ai/evaluation/_aoai/aoai_grader.py +69 -28
  3. azure/ai/evaluation/_aoai/label_grader.py +14 -13
  4. azure/ai/evaluation/_aoai/python_grader.py +15 -13
  5. azure/ai/evaluation/_aoai/score_model_grader.py +13 -10
  6. azure/ai/evaluation/_aoai/string_check_grader.py +13 -13
  7. azure/ai/evaluation/_aoai/text_similarity_grader.py +16 -25
  8. azure/ai/evaluation/_common/__init__.py +2 -1
  9. azure/ai/evaluation/_common/constants.py +109 -0
  10. azure/ai/evaluation/_common/evaluation_onedp_client.py +5 -5
  11. azure/ai/evaluation/_common/onedp/__init__.py +2 -2
  12. azure/ai/evaluation/_common/onedp/_client.py +44 -14
  13. azure/ai/evaluation/_common/onedp/_configuration.py +9 -7
  14. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1 -1
  15. azure/ai/evaluation/_common/onedp/_validation.py +18 -2
  16. azure/ai/evaluation/_common/onedp/aio/__init__.py +2 -2
  17. azure/ai/evaluation/_common/onedp/aio/_client.py +44 -14
  18. azure/ai/evaluation/_common/onedp/aio/_configuration.py +9 -7
  19. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +12 -0
  20. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +3942 -1631
  21. azure/ai/evaluation/_common/onedp/models/__init__.py +196 -6
  22. azure/ai/evaluation/_common/onedp/models/_enums.py +217 -0
  23. azure/ai/evaluation/_common/onedp/models/_models.py +3876 -603
  24. azure/ai/evaluation/_common/onedp/operations/__init__.py +12 -0
  25. azure/ai/evaluation/_common/onedp/operations/_operations.py +5422 -2577
  26. azure/ai/evaluation/_common/rai_service.py +299 -2
  27. azure/ai/evaluation/_common/utils.py +173 -39
  28. azure/ai/evaluation/_constants.py +100 -0
  29. azure/ai/evaluation/_eval_mapping.py +10 -0
  30. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +10 -0
  31. azure/ai/evaluation/_evaluate/_evaluate.py +1125 -9
  32. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +330 -51
  33. azure/ai/evaluation/_evaluate/_utils.py +17 -6
  34. azure/ai/evaluation/_evaluator_definition.py +76 -0
  35. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -0
  36. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -17
  37. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +7 -1
  38. azure/ai/evaluation/_evaluators/_common/_base_eval.py +80 -4
  39. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +181 -3
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +7 -1
  41. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +28 -13
  42. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  43. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +23 -4
  44. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -7
  45. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +36 -19
  46. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +43 -20
  47. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +7 -1
  48. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +7 -1
  49. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +32 -6
  50. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  51. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  52. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  53. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  54. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  55. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +23 -127
  56. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  57. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  58. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  59. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  60. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  61. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  62. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  64. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  65. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  66. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  67. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  68. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -19
  69. azure/ai/evaluation/_exceptions.py +6 -0
  70. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +14 -1
  71. azure/ai/evaluation/_legacy/prompty/_prompty.py +2 -1
  72. azure/ai/evaluation/_legacy/prompty/_utils.py +54 -6
  73. azure/ai/evaluation/_model_configurations.py +26 -0
  74. azure/ai/evaluation/_version.py +1 -1
  75. azure/ai/evaluation/red_team/_attack_objective_generator.py +3 -1
  76. azure/ai/evaluation/red_team/_attack_strategy.py +1 -0
  77. azure/ai/evaluation/red_team/_callback_chat_target.py +45 -14
  78. azure/ai/evaluation/red_team/_evaluation_processor.py +129 -12
  79. azure/ai/evaluation/red_team/_mlflow_integration.py +144 -36
  80. azure/ai/evaluation/red_team/_orchestrator_manager.py +309 -51
  81. azure/ai/evaluation/red_team/_red_team.py +503 -37
  82. azure/ai/evaluation/red_team/_red_team_result.py +264 -15
  83. azure/ai/evaluation/red_team/_result_processor.py +953 -31
  84. azure/ai/evaluation/red_team/_utils/constants.py +1 -0
  85. azure/ai/evaluation/red_team/_utils/formatting_utils.py +126 -25
  86. azure/ai/evaluation/red_team/_utils/metric_mapping.py +10 -7
  87. azure/ai/evaluation/red_team/_utils/strategy_utils.py +3 -25
  88. azure/ai/evaluation/simulator/_adversarial_simulator.py +1 -1
  89. azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  90. azure/ai/evaluation/simulator/_conversation/_conversation.py +1 -1
  91. azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
  92. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +1 -1
  93. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +25 -2
  94. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -0
  95. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +1 -1
  96. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  97. azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
  98. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/METADATA +44 -10
  99. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/RECORD +102 -84
  100. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/WHEEL +0 -0
  101. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/licenses/NOTICE.txt +0 -0
  102. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ import itertools
7
7
  import logging
8
8
  import math
9
9
  import os
10
+ from pathlib import Path
10
11
  import random
11
12
  import time
12
13
  import uuid
@@ -17,6 +18,10 @@ from tqdm import tqdm
17
18
  # Azure AI Evaluation imports
18
19
  from azure.ai.evaluation._constants import TokenScope
19
20
  from azure.ai.evaluation._common._experimental import experimental
21
+
22
+ from azure.ai.evaluation._evaluate._evaluate import (
23
+ emit_eval_result_events_to_app_insights,
24
+ ) # TODO: uncomment when app insights checked in
20
25
  from azure.ai.evaluation._model_configurations import EvaluationResult
21
26
  from azure.ai.evaluation.simulator._model_tools import ManagedIdentityAPITokenManager
22
27
  from azure.ai.evaluation.simulator._model_tools._generated_rai_client import GeneratedRAIClient
@@ -65,6 +70,7 @@ from ._utils.formatting_utils import (
65
70
  get_flattened_attack_strategies,
66
71
  write_pyrit_outputs_to_file,
67
72
  format_scorecard,
73
+ format_content_by_modality,
68
74
  )
69
75
  from ._utils.strategy_utils import get_chat_target, get_converter_for_strategy
70
76
  from ._utils.retry_utils import create_standard_retry_manager
@@ -208,6 +214,9 @@ class RedTeam:
208
214
  # keep track of prompt content to context mapping for evaluation
209
215
  self.prompt_to_context = {}
210
216
 
217
+ # keep track of prompt content to risk_sub_type mapping for evaluation
218
+ self.prompt_to_risk_subtype = {}
219
+
211
220
  # Initialize PyRIT
212
221
  initialize_pyrit(memory_db_type=DUCK_DB)
213
222
 
@@ -276,6 +285,7 @@ class RedTeam:
276
285
  one_dp_project=self._one_dp_project,
277
286
  retry_config=retry_config,
278
287
  scan_output_dir=self.scan_output_dir,
288
+ red_team=self,
279
289
  )
280
290
 
281
291
  # Initialize evaluation processor
@@ -287,6 +297,7 @@ class RedTeam:
287
297
  retry_config=retry_config,
288
298
  scan_session_id=self.scan_session_id,
289
299
  scan_output_dir=self.scan_output_dir,
300
+ taxonomy_risk_categories=getattr(self, "taxonomy_risk_categories", None),
290
301
  )
291
302
 
292
303
  # Initialize MLflow integration
@@ -305,6 +316,7 @@ class RedTeam:
305
316
  application_scenario=getattr(self, "application_scenario", ""),
306
317
  risk_categories=getattr(self, "risk_categories", []),
307
318
  ai_studio_url=getattr(self.mlflow_integration, "ai_studio_url", None),
319
+ mlflow_integration=self.mlflow_integration,
308
320
  )
309
321
 
310
322
  async def _get_attack_objectives(
@@ -312,6 +324,8 @@ class RedTeam:
312
324
  risk_category: Optional[RiskCategory] = None,
313
325
  application_scenario: Optional[str] = None,
314
326
  strategy: Optional[str] = None,
327
+ is_agent_target: Optional[bool] = None,
328
+ client_id: Optional[str] = None,
315
329
  ) -> List[str]:
316
330
  """Get attack objectives from the RAI client for a specific risk category or from a custom dataset.
317
331
 
@@ -327,6 +341,8 @@ class RedTeam:
327
341
  :type application_scenario: Optional[str]
328
342
  :param strategy: Optional attack strategy to get specific objectives for
329
343
  :type strategy: Optional[str]
344
+ :param is_agent_target: Optional boolean indicating if target is an agent (True) or model (False)
345
+ :type is_agent_target: Optional[bool]
330
346
  :return: A list of attack objective prompts
331
347
  :rtype: List[str]
332
348
  """
@@ -348,7 +364,39 @@ class RedTeam:
348
364
 
349
365
  # Check if custom attack seed prompts are provided in the generator
350
366
  if attack_objective_generator.custom_attack_seed_prompts and attack_objective_generator.validated_prompts:
351
- return await self._get_custom_attack_objectives(risk_cat_value, num_objectives, strategy, current_key)
367
+ # Check if this specific risk category has custom objectives
368
+ custom_objectives = attack_objective_generator.valid_prompts_by_category.get(risk_cat_value, [])
369
+
370
+ if custom_objectives:
371
+ # Use custom objectives for this risk category
372
+ return await self._get_custom_attack_objectives(risk_cat_value, num_objectives, strategy, current_key)
373
+ else:
374
+ # No custom objectives for this risk category, but risk_categories was specified
375
+ # Fetch from service if this risk category is in the requested list
376
+ if (
377
+ self.attack_objective_generator.risk_categories
378
+ and risk_category in self.attack_objective_generator.risk_categories
379
+ ):
380
+ self.logger.info(
381
+ f"No custom objectives found for risk category {risk_cat_value}, fetching from service"
382
+ )
383
+ return await self._get_rai_attack_objectives(
384
+ risk_category,
385
+ risk_cat_value,
386
+ application_scenario,
387
+ strategy,
388
+ baseline_objectives_exist,
389
+ baseline_key,
390
+ current_key,
391
+ num_objectives,
392
+ is_agent_target,
393
+ )
394
+ else:
395
+ # Risk category not in requested list, return empty
396
+ self.logger.warning(
397
+ f"No custom objectives found for risk category {risk_cat_value} and it's not in the requested risk categories"
398
+ )
399
+ return []
352
400
  else:
353
401
  return await self._get_rai_attack_objectives(
354
402
  risk_category,
@@ -359,6 +407,8 @@ class RedTeam:
359
407
  baseline_key,
360
408
  current_key,
361
409
  num_objectives,
410
+ is_agent_target,
411
+ client_id,
362
412
  )
363
413
 
364
414
  async def _get_custom_attack_objectives(
@@ -420,6 +470,8 @@ class RedTeam:
420
470
  baseline_key: tuple,
421
471
  current_key: tuple,
422
472
  num_objectives: int,
473
+ is_agent_target: Optional[bool] = None,
474
+ client_id: Optional[str] = None,
423
475
  ) -> List[str]:
424
476
  """Get attack objectives from the RAI service."""
425
477
  content_harm_risk = None
@@ -435,6 +487,8 @@ class RedTeam:
435
487
  )
436
488
 
437
489
  # Get objectives from RAI service
490
+ target_type_str = "agent" if is_agent_target else "model" if is_agent_target is not None else None
491
+
438
492
  if "tense" in strategy:
439
493
  objectives_response = await self.generated_rai_client.get_attack_objectives(
440
494
  risk_type=content_harm_risk,
@@ -443,6 +497,8 @@ class RedTeam:
443
497
  strategy="tense",
444
498
  language=self.language.value,
445
499
  scan_session_id=self.scan_session_id,
500
+ target=target_type_str,
501
+ client_id=client_id,
446
502
  )
447
503
  else:
448
504
  objectives_response = await self.generated_rai_client.get_attack_objectives(
@@ -452,11 +508,12 @@ class RedTeam:
452
508
  strategy=None,
453
509
  language=self.language.value,
454
510
  scan_session_id=self.scan_session_id,
511
+ target=target_type_str,
512
+ client_id=client_id,
455
513
  )
456
514
 
457
515
  if isinstance(objectives_response, list):
458
516
  self.logger.debug(f"API returned {len(objectives_response)} objectives")
459
-
460
517
  # Handle jailbreak strategy
461
518
  if strategy == "jailbreak":
462
519
  objectives_response = await self._apply_jailbreak_prefixes(objectives_response)
@@ -470,8 +527,62 @@ class RedTeam:
470
527
  if not objectives_response or (
471
528
  isinstance(objectives_response, dict) and not objectives_response.get("objectives")
472
529
  ):
473
- self.logger.warning("Empty or invalid response, returning empty list")
474
- return []
530
+ # If we got no agent objectives, fallback to model objectives
531
+ if is_agent_target:
532
+ self.logger.warning(
533
+ f"No agent-type attack objectives found for {risk_cat_value}. "
534
+ "Falling back to model-type objectives."
535
+ )
536
+ try:
537
+ # Retry with model target type
538
+ if "tense" in strategy:
539
+ objectives_response = await self.generated_rai_client.get_attack_objectives(
540
+ risk_type=content_harm_risk,
541
+ risk_category=other_risk,
542
+ application_scenario=application_scenario or "",
543
+ strategy="tense",
544
+ language=self.language.value,
545
+ scan_session_id=self.scan_session_id,
546
+ target="model",
547
+ client_id=client_id,
548
+ )
549
+ else:
550
+ objectives_response = await self.generated_rai_client.get_attack_objectives(
551
+ risk_type=content_harm_risk,
552
+ risk_category=other_risk,
553
+ application_scenario=application_scenario or "",
554
+ strategy=None,
555
+ language=self.language.value,
556
+ scan_session_id=self.scan_session_id,
557
+ target="model",
558
+ client_id=client_id,
559
+ )
560
+
561
+ if isinstance(objectives_response, list):
562
+ self.logger.debug(f"Fallback API returned {len(objectives_response)} model-type objectives")
563
+
564
+ # Apply strategy-specific transformations to fallback objectives
565
+ # Still try agent-type attack techniques (jailbreak/XPIA) even with model-type baseline objectives
566
+ if strategy == "jailbreak":
567
+ objectives_response = await self._apply_jailbreak_prefixes(objectives_response)
568
+ elif strategy == "indirect_jailbreak":
569
+ # Try agent-type XPIA first, will fallback to model-type XPIA within the method
570
+ objectives_response = await self._apply_xpia_prompts(objectives_response, "agent")
571
+
572
+ # Check if fallback response is also empty
573
+ if not objectives_response or (
574
+ isinstance(objectives_response, dict) and not objectives_response.get("objectives")
575
+ ):
576
+ self.logger.warning("Fallback to model-type objectives also returned empty list")
577
+ return []
578
+
579
+ except Exception as fallback_error:
580
+ self.logger.error(f"Error calling get_attack_objectives with model fallback: {str(fallback_error)}")
581
+ self.logger.warning("Fallback API call failed, returning empty objectives list")
582
+ return []
583
+ else:
584
+ self.logger.warning("Empty or invalid response, returning empty list")
585
+ return []
475
586
 
476
587
  # Filter and select objectives
477
588
  selected_cat_objectives = self._filter_and_select_objectives(
@@ -484,6 +595,163 @@ class RedTeam:
484
595
 
485
596
  return selected_prompts
486
597
 
598
+ async def _apply_xpia_prompts(self, objectives_list: List, target_type_str: str) -> List:
599
+ """Apply XPIA prompt formatting to objectives for indirect jailbreak strategy.
600
+
601
+ XPIA prompts are wrapper structures that contain:
602
+ - content: benign user query to trigger tool use
603
+ - context: attack vehicle with {attack_text} placeholder
604
+ - context_type: modality for formatting (email, document, html, code)
605
+ - tool_name: name for mock tool
606
+
607
+ We inject the baseline attack objectives into these XPIA wrapper prompts.
608
+ """
609
+ self.logger.debug(f"Applying XPIA prompts to objectives for indirect jailbreak (target_type={target_type_str})")
610
+
611
+ try:
612
+ # Fetch XPIA wrapper prompts from RAI service
613
+ @self.retry_manager.create_retry_decorator(context="xpia_prompts")
614
+ async def get_xpia_prompts_with_retry():
615
+ return await self.generated_rai_client.get_attack_objectives(
616
+ risk_type=None,
617
+ risk_category="xpia",
618
+ application_scenario="",
619
+ strategy=None,
620
+ language=self.language.value,
621
+ scan_session_id=self.scan_session_id,
622
+ target=target_type_str,
623
+ )
624
+
625
+ xpia_prompts = await get_xpia_prompts_with_retry()
626
+
627
+ # If no agent XPIA prompts and we're trying agent, fallback to model
628
+ if (not xpia_prompts or len(xpia_prompts) == 0) and target_type_str == "agent":
629
+ self.logger.debug("No agent-type XPIA prompts available, falling back to model-type XPIA prompts")
630
+ try:
631
+ xpia_prompts = await self.generated_rai_client.get_attack_objectives(
632
+ risk_type=None,
633
+ risk_category="xpia",
634
+ application_scenario="",
635
+ strategy=None,
636
+ language=self.language.value,
637
+ scan_session_id=self.scan_session_id,
638
+ target="model",
639
+ )
640
+ if xpia_prompts and len(xpia_prompts) > 0:
641
+ self.logger.debug(f"Fetched {len(xpia_prompts)} model-type XPIA wrapper prompts as fallback")
642
+ except Exception as fallback_error:
643
+ self.logger.error(f"Error fetching model-type XPIA prompts as fallback: {str(fallback_error)}")
644
+
645
+ if not xpia_prompts or len(xpia_prompts) == 0:
646
+ self.logger.warning("No XPIA prompts available (even after fallback), returning objectives unchanged")
647
+ return objectives_list
648
+
649
+ self.logger.debug(f"Fetched {len(xpia_prompts)} XPIA wrapper prompts")
650
+
651
+ # Apply XPIA wrapping to each baseline objective
652
+ for objective in objectives_list:
653
+ if "messages" in objective and len(objective["messages"]) > 0:
654
+ message = objective["messages"][0]
655
+ if isinstance(message, dict) and "content" in message:
656
+ # Get the baseline attack content to inject
657
+ baseline_attack_content = message["content"]
658
+ # Preserve the original baseline context if it exists
659
+ baseline_context = message.get("context", "")
660
+
661
+ # Normalize baseline_context to a list of context dicts
662
+ baseline_contexts = []
663
+ if baseline_context:
664
+ # Extract baseline context from RAI service format
665
+ context_dict = {"content": baseline_context}
666
+ if message.get("tool_name"):
667
+ context_dict["tool_name"] = message["tool_name"]
668
+ if message.get("context_type"):
669
+ context_dict["context_type"] = message["context_type"]
670
+ baseline_contexts = [context_dict]
671
+
672
+ # Check if baseline contexts have agent fields (context_type, tool_name)
673
+ baseline_contexts_with_agent_fields = []
674
+ baseline_contexts_without_agent_fields = []
675
+
676
+ for ctx in baseline_contexts:
677
+ if isinstance(ctx, dict):
678
+ if "context_type" in ctx or "tool_name" in ctx:
679
+ # This baseline context has agent fields - preserve it separately
680
+ baseline_contexts_with_agent_fields.append(ctx)
681
+ self.logger.debug(
682
+ f"Found baseline context with agent fields: tool_name={ctx.get('tool_name')}, context_type={ctx.get('context_type')}"
683
+ )
684
+ else:
685
+ # This baseline context has no agent fields - can be embedded
686
+ baseline_contexts_without_agent_fields.append(ctx)
687
+ else:
688
+ baseline_contexts_without_agent_fields.append({"content": str(ctx)})
689
+
690
+ # For baseline contexts without agent fields, embed them in the attack content
691
+ if baseline_contexts_without_agent_fields:
692
+ context_texts = [
693
+ ctx.get("content", "")
694
+ for ctx in baseline_contexts_without_agent_fields
695
+ if ctx.get("content")
696
+ ]
697
+ if context_texts:
698
+ combined_context = "\n\n".join(context_texts)
699
+ baseline_attack_content = f"{baseline_attack_content}\n\nContext:\n{combined_context}"
700
+ self.logger.debug(
701
+ f"Embedded {len(context_texts)} baseline context(s) without agent fields into attack content"
702
+ )
703
+
704
+ # Randomly select an XPIA wrapper prompt
705
+ xpia_prompt = random.choice(xpia_prompts)
706
+ xpia_message = xpia_prompt.get("messages", [{}])[0]
707
+
708
+ # Extract XPIA wrapper structure
709
+ user_query = xpia_message.get("content", "")
710
+ attack_vehicle_context = xpia_message.get("context", "")
711
+ context_type = xpia_message.get("context_type") or "text"
712
+ tool_name = xpia_message.get("tool_name", "")
713
+
714
+ # Inject baseline attack (now with appended context) into the {attack_text} placeholder
715
+ if "{attack_text}" in attack_vehicle_context:
716
+ injected_context = attack_vehicle_context.replace("{attack_text}", baseline_attack_content)
717
+ else:
718
+ # No placeholder found, append to end
719
+ injected_context = f"{attack_vehicle_context}\n\n{baseline_attack_content}"
720
+
721
+ # Apply modality-based formatting
722
+ formatted_context = format_content_by_modality(injected_context, context_type)
723
+
724
+ # Update the message with benign user query
725
+ message["content"] = user_query
726
+
727
+ # Build the contexts list: XPIA context + any baseline contexts with agent fields
728
+ contexts = [
729
+ {"content": formatted_context, "context_type": context_type, "tool_name": tool_name}
730
+ ]
731
+
732
+ # Add baseline contexts with agent fields as separate context entries
733
+ if baseline_contexts_with_agent_fields:
734
+ contexts.extend(baseline_contexts_with_agent_fields)
735
+ self.logger.debug(
736
+ f"Preserved {len(baseline_contexts_with_agent_fields)} baseline context(s) with agent fields"
737
+ )
738
+
739
+ message["context"] = contexts
740
+ message["context_type"] = (
741
+ context_type # Keep at message level for backward compat (XPIA primary)
742
+ )
743
+ message["tool_name"] = tool_name
744
+
745
+ self.logger.debug(
746
+ f"Wrapped baseline attack in XPIA: total contexts={len(contexts)}, xpia_tool={tool_name}, xpia_type={context_type}"
747
+ )
748
+
749
+ except Exception as e:
750
+ self.logger.error(f"Error applying XPIA prompts: {str(e)}")
751
+ self.logger.warning("XPIA prompt application failed, returning original objectives")
752
+
753
+ return objectives_list
754
+
487
755
  async def _apply_jailbreak_prefixes(self, objectives_list: List) -> List:
488
756
  """Apply jailbreak prefixes to objectives."""
489
757
  self.logger.debug("Applying jailbreak prefixes to objectives")
@@ -521,10 +789,52 @@ class RedTeam:
521
789
 
522
790
  if baseline_objective_ids:
523
791
  self.logger.debug(f"Filtering by {len(baseline_objective_ids)} baseline objective IDs for {strategy}")
524
- selected_cat_objectives = [
525
- obj for obj in objectives_response if obj.get("id") in baseline_objective_ids
526
- ]
527
- self.logger.debug(f"Found {len(selected_cat_objectives)} matching objectives with baseline IDs")
792
+ # Filter by baseline IDs
793
+ filtered_objectives = [obj for obj in objectives_response if obj.get("id") in baseline_objective_ids]
794
+ self.logger.debug(f"Found {len(filtered_objectives)} matching objectives with baseline IDs")
795
+
796
+ # For strategies like indirect_jailbreak, the RAI service may return multiple
797
+ # objectives per baseline ID (e.g., multiple XPIA variations for one baseline objective).
798
+ # We should select num_objectives total, ensuring each baseline objective gets an XPIA attack.
799
+ # Group by baseline ID and select one objective per baseline ID up to num_objectives.
800
+ selected_by_id = {}
801
+ for obj in filtered_objectives:
802
+ obj_id = obj.get("id")
803
+ if obj_id not in selected_by_id:
804
+ selected_by_id[obj_id] = []
805
+ selected_by_id[obj_id].append(obj)
806
+
807
+ # Select objectives to match num_objectives
808
+ selected_cat_objectives = []
809
+ baseline_ids = list(selected_by_id.keys())
810
+
811
+ # If we have enough baseline IDs to cover num_objectives, select one per baseline ID
812
+ if len(baseline_ids) >= num_objectives:
813
+ # Select from the first num_objectives baseline IDs
814
+ for i in range(num_objectives):
815
+ obj_id = baseline_ids[i]
816
+ selected_cat_objectives.append(random.choice(selected_by_id[obj_id]))
817
+ else:
818
+ # If we have fewer baseline IDs than num_objectives, select all and cycle through
819
+ for i in range(num_objectives):
820
+ obj_id = baseline_ids[i % len(baseline_ids)]
821
+ # For repeated IDs, try to select different variations if available
822
+ available_variations = selected_by_id[obj_id].copy()
823
+ # Remove already selected variations for this baseline ID
824
+ already_selected = [obj for obj in selected_cat_objectives if obj.get("id") == obj_id]
825
+ for selected_obj in already_selected:
826
+ if selected_obj in available_variations:
827
+ available_variations.remove(selected_obj)
828
+
829
+ if available_variations:
830
+ selected_cat_objectives.append(random.choice(available_variations))
831
+ else:
832
+ # If no more variations, reuse one (shouldn't happen with proper XPIA generation)
833
+ selected_cat_objectives.append(random.choice(selected_by_id[obj_id]))
834
+
835
+ self.logger.debug(
836
+ f"Selected {len(selected_cat_objectives)} objectives from {len(baseline_ids)} baseline IDs and {len(filtered_objectives)} total variations for {strategy} strategy"
837
+ )
528
838
  else:
529
839
  self.logger.warning("No baseline objective IDs found, using random selection")
530
840
  selected_cat_objectives = random.sample(
@@ -543,17 +853,87 @@ class RedTeam:
543
853
  return selected_cat_objectives
544
854
 
545
855
  def _extract_objective_content(self, selected_objectives: List) -> List[str]:
546
- """Extract content from selected objectives."""
856
+ """Extract content from selected objectives and build prompt-to-context mapping."""
547
857
  selected_prompts = []
548
858
  for obj in selected_objectives:
859
+ risk_subtype = None
860
+ # Extract risk-subtype from target_harms if present
861
+ target_harms = obj.get("metadata", {}).get("target_harms", [])
862
+ if target_harms and isinstance(target_harms, list):
863
+ for harm in target_harms:
864
+ if isinstance(harm, dict) and "risk-subtype" in harm:
865
+ subtype_value = harm.get("risk-subtype")
866
+ if subtype_value:
867
+ risk_subtype = subtype_value
868
+ break
549
869
  if "messages" in obj and len(obj["messages"]) > 0:
550
870
  message = obj["messages"][0]
551
871
  if isinstance(message, dict) and "content" in message:
552
872
  content = message["content"]
553
- context = message.get("context", "")
873
+ context_raw = message.get("context", "")
874
+ # TODO is first if necessary?
875
+ # Normalize context to always be a list of dicts with 'content' key
876
+ if isinstance(context_raw, list):
877
+ # Already a list - ensure each item is a dict with 'content' key
878
+ contexts = []
879
+ for ctx in context_raw:
880
+ if isinstance(ctx, dict) and "content" in ctx:
881
+ # Preserve all keys including context_type, tool_name if present
882
+ contexts.append(ctx)
883
+ elif isinstance(ctx, str):
884
+ contexts.append({"content": ctx})
885
+ elif context_raw:
886
+ # Single string value - wrap in dict
887
+ contexts = [{"content": context_raw}]
888
+ if message.get("tool_name"):
889
+ contexts[0]["tool_name"] = message["tool_name"]
890
+ if message.get("context_type"):
891
+ contexts[0]["context_type"] = message["context_type"]
892
+ else:
893
+ contexts = []
894
+
895
+ # Check if any context has agent-specific fields
896
+ has_agent_fields = any(
897
+ isinstance(ctx, dict) and ("context_type" in ctx or "tool_name" in ctx) for ctx in contexts
898
+ )
899
+
900
+ # For contexts without agent fields, append them to the content
901
+ # This applies to baseline and any other attack objectives with plain context
902
+ if contexts and not has_agent_fields:
903
+ # Extract all context content and append to the attack content
904
+ context_texts = []
905
+ for ctx in contexts:
906
+ if isinstance(ctx, dict):
907
+ ctx_content = ctx.get("content", "")
908
+ if ctx_content:
909
+ context_texts.append(ctx_content)
910
+
911
+ if context_texts:
912
+ # Append context to content
913
+ combined_context = "\n\n".join(context_texts)
914
+ content = f"{content}\n\nContext:\n{combined_context}"
915
+ self.logger.debug(
916
+ f"Appended {len(context_texts)} context source(s) to attack content (total context length={len(combined_context)})"
917
+ )
918
+
554
919
  selected_prompts.append(content)
555
- # Store mapping of content to context for later evaluation
556
- self.prompt_to_context[content] = context
920
+
921
+ # Store risk_subtype mapping if it exists
922
+ if risk_subtype:
923
+ self.prompt_to_risk_subtype[content] = risk_subtype
924
+
925
+ # Always store contexts if they exist (whether or not they have agent fields)
926
+ if contexts:
927
+ context_dict = {"contexts": contexts}
928
+ if has_agent_fields:
929
+ self.logger.debug(f"Stored context with agent fields: {len(contexts)} context source(s)")
930
+ else:
931
+ self.logger.debug(
932
+ f"Stored context without agent fields: {len(contexts)} context source(s) (also embedded in content)"
933
+ )
934
+ self.prompt_to_context[content] = context_dict
935
+ else:
936
+ self.logger.debug(f"No context to store")
557
937
  return selected_prompts
558
938
 
559
939
  def _cache_attack_objectives(
@@ -573,6 +953,18 @@ class RedTeam:
573
953
  target_harms = obj.get("metadata", {}).get("target_harms", [])
574
954
  content = ""
575
955
  context = ""
956
+ risk_subtype = None
957
+
958
+ # Extract risk-subtype from target_harms if present
959
+ if target_harms and isinstance(target_harms, list):
960
+ for harm in target_harms:
961
+ if isinstance(harm, dict) and "risk-subtype" in harm:
962
+ subtype_value = harm.get("risk-subtype")
963
+ # Only store non-empty risk-subtype values
964
+ if subtype_value:
965
+ risk_subtype = subtype_value
966
+ break # Use the first non-empty risk-subtype found
967
+
576
968
  if "messages" in obj and len(obj["messages"]) > 0:
577
969
 
578
970
  message = obj["messages"][0]
@@ -580,6 +972,9 @@ class RedTeam:
580
972
  context = message.get("context", "")
581
973
  if content:
582
974
  obj_data = {"id": obj_id, "content": content, "context": context}
975
+ # Add risk_subtype to obj_data if it exists
976
+ if risk_subtype:
977
+ obj_data["risk_subtype"] = risk_subtype
583
978
  objectives_by_category[risk_cat_value].append(obj_data)
584
979
 
585
980
  self.attack_objectives[current_key] = {
@@ -783,6 +1178,16 @@ class RedTeam:
783
1178
  :rtype: RedTeamResult
784
1179
  """
785
1180
  user_agent: Optional[str] = kwargs.get("user_agent", "(type=redteam; subtype=RedTeam)")
1181
+ run_id_override = kwargs.get("run_id") or kwargs.get("runId")
1182
+ eval_id_override = kwargs.get("eval_id") or kwargs.get("evalId")
1183
+ created_at_override = kwargs.get("created_at") or kwargs.get("createdAt")
1184
+ taxonomy_risk_categories = kwargs.get("taxonomy_risk_categories") # key is risk category value is taxonomy
1185
+ _app_insights_configuration = kwargs.get("_app_insights_configuration")
1186
+ self._app_insights_configuration = _app_insights_configuration
1187
+ self.taxonomy_risk_categories = taxonomy_risk_categories or {}
1188
+ is_agent_target: Optional[bool] = kwargs.get("is_agent_target", False)
1189
+ client_id: Optional[str] = kwargs.get("client_id")
1190
+
786
1191
  with UserAgentSingleton().add_useragent_product(user_agent):
787
1192
  # Initialize scan
788
1193
  self._initialize_scan(scan_name, application_scenario)
@@ -802,6 +1207,12 @@ class RedTeam:
802
1207
  self.mlflow_integration.logger = self.logger
803
1208
  self.result_processor.logger = self.logger
804
1209
 
1210
+ self.mlflow_integration.set_run_identity_overrides(
1211
+ run_id=run_id_override,
1212
+ eval_id=eval_id_override,
1213
+ created_at=created_at_override,
1214
+ )
1215
+
805
1216
  # Validate attack objective generator
806
1217
  if not self.attack_objective_generator:
807
1218
  raise EvaluationException(
@@ -825,6 +1236,19 @@ class RedTeam:
825
1236
  self.risk_categories = self.attack_objective_generator.risk_categories
826
1237
  self.result_processor.risk_categories = self.risk_categories
827
1238
 
1239
+ # Validate risk categories for target type
1240
+ if not is_agent_target:
1241
+ # Check if any agent-only risk categories are used with model targets
1242
+ for risk_cat in self.risk_categories:
1243
+ if risk_cat == RiskCategory.SensitiveDataLeakage:
1244
+ raise EvaluationException(
1245
+ message=f"Risk category '{risk_cat.value}' is only available for agent targets",
1246
+ internal_message=f"Risk category {risk_cat.value} requires agent target",
1247
+ target=ErrorTarget.RED_TEAM,
1248
+ category=ErrorCategory.INVALID_VALUE,
1249
+ blame=ErrorBlame.USER_ERROR,
1250
+ )
1251
+
828
1252
  # Show risk categories to user
829
1253
  tqdm.write(f"📊 Risk categories: {[rc.value for rc in self.risk_categories]}")
830
1254
  self.logger.info(f"Risk categories to process: {[rc.value for rc in self.risk_categories]}")
@@ -853,9 +1277,11 @@ class RedTeam:
853
1277
  self._initialize_tracking_dict(flattened_attack_strategies)
854
1278
 
855
1279
  # Fetch attack objectives
856
- all_objectives = await self._fetch_all_objectives(flattened_attack_strategies, application_scenario)
1280
+ all_objectives = await self._fetch_all_objectives(
1281
+ flattened_attack_strategies, application_scenario, is_agent_target, client_id
1282
+ )
857
1283
 
858
- chat_target = get_chat_target(target, self.prompt_to_context)
1284
+ chat_target = get_chat_target(target)
859
1285
  self.chat_target = chat_target
860
1286
 
861
1287
  # Execute attacks
@@ -872,7 +1298,7 @@ class RedTeam:
872
1298
  )
873
1299
 
874
1300
  # Process and return results
875
- return await self._finalize_results(skip_upload, skip_evals, eval_run, output_path)
1301
+ return await self._finalize_results(skip_upload, skip_evals, eval_run, output_path, scan_name)
876
1302
 
877
1303
  def _initialize_scan(self, scan_name: Optional[str], application_scenario: Optional[str]):
878
1304
  """Initialize scan-specific variables."""
@@ -944,11 +1370,10 @@ class RedTeam:
944
1370
  )
945
1371
  raise ValueError("MultiTurn and Crescendo strategies are not compatible with multiple attack strategies.")
946
1372
  if AttackStrategy.Tense in flattened_attack_strategies and (
947
- RiskCategory.IndirectAttack in self.risk_categories
948
- or RiskCategory.UngroundedAttributes in self.risk_categories
1373
+ RiskCategory.UngroundedAttributes in self.risk_categories
949
1374
  ):
950
1375
  self.logger.warning(
951
- "Tense strategy is not compatible with IndirectAttack or UngroundedAttributes risk categories. Skipping Tense strategy."
1376
+ "Tense strategy is not compatible with UngroundedAttributes risk categories. Skipping Tense strategy."
952
1377
  )
953
1378
  raise ValueError(
954
1379
  "Tense strategy is not compatible with IndirectAttack or UngroundedAttributes risk categories."
@@ -968,7 +1393,13 @@ class RedTeam:
968
1393
  "status": TASK_STATUS["PENDING"],
969
1394
  }
970
1395
 
971
- async def _fetch_all_objectives(self, flattened_attack_strategies: List, application_scenario: str) -> Dict:
1396
+ async def _fetch_all_objectives(
1397
+ self,
1398
+ flattened_attack_strategies: List,
1399
+ application_scenario: str,
1400
+ is_agent_target: bool,
1401
+ client_id: Optional[str] = None,
1402
+ ) -> Dict:
972
1403
  """Fetch all attack objectives for all strategies and risk categories."""
973
1404
  log_section_header(self.logger, "Fetching attack objectives")
974
1405
  all_objectives = {}
@@ -980,6 +1411,8 @@ class RedTeam:
980
1411
  risk_category=risk_category,
981
1412
  application_scenario=application_scenario,
982
1413
  strategy="baseline",
1414
+ is_agent_target=is_agent_target,
1415
+ client_id=client_id,
983
1416
  )
984
1417
  if "baseline" not in all_objectives:
985
1418
  all_objectives["baseline"] = {}
@@ -1003,6 +1436,8 @@ class RedTeam:
1003
1436
  risk_category=risk_category,
1004
1437
  application_scenario=application_scenario,
1005
1438
  strategy=strategy_name,
1439
+ is_agent_target=is_agent_target,
1440
+ client_id=client_id,
1006
1441
  )
1007
1442
  all_objectives[strategy_name][risk_category.value] = objectives
1008
1443
 
@@ -1104,47 +1539,78 @@ class RedTeam:
1104
1539
  self.logger.error(f"Error processing task {i+1}: {str(e)}")
1105
1540
  continue
1106
1541
 
1107
- async def _finalize_results(self, skip_upload: bool, skip_evals: bool, eval_run, output_path: str) -> RedTeamResult:
1542
+ async def _finalize_results(
1543
+ self, skip_upload: bool, skip_evals: bool, eval_run, output_path: str, scan_name: str
1544
+ ) -> RedTeamResult:
1108
1545
  """Process and finalize scan results."""
1109
1546
  log_section_header(self.logger, "Processing results")
1110
1547
 
1111
- # Convert results to RedTeamResult
1112
- red_team_result = self.result_processor.to_red_team_result(self.red_team_info)
1113
-
1114
- output = RedTeamResult(
1115
- scan_result=red_team_result,
1116
- attack_details=red_team_result["attack_details"],
1548
+ # Convert results to RedTeamResult (now builds AOAI summary internally)
1549
+ red_team_result = self.result_processor.to_red_team_result(
1550
+ red_team_info=self.red_team_info,
1551
+ eval_run=eval_run,
1552
+ scan_name=scan_name,
1117
1553
  )
1118
1554
 
1555
+ # Extract AOAI summary for passing to MLflow logging
1556
+ aoai_summary = red_team_result.scan_result.get("AOAI_Compatible_Summary")
1557
+ if self._app_insights_configuration:
1558
+ emit_eval_result_events_to_app_insights(
1559
+ self._app_insights_configuration, aoai_summary["output_items"]["data"]
1560
+ )
1119
1561
  # Log results to MLFlow if not skipping upload
1120
1562
  if not skip_upload:
1121
1563
  self.logger.info("Logging results to AI Foundry")
1122
1564
  await self.mlflow_integration.log_redteam_results_to_mlflow(
1123
- redteam_result=output, eval_run=eval_run, red_team_info=self.red_team_info, _skip_evals=skip_evals
1565
+ redteam_result=red_team_result,
1566
+ eval_run=eval_run,
1567
+ red_team_info=self.red_team_info,
1568
+ _skip_evals=skip_evals,
1569
+ aoai_summary=aoai_summary,
1124
1570
  )
1125
-
1126
1571
  # Write output to specified path
1127
- if output_path and output.scan_result:
1572
+ if output_path and red_team_result.scan_result:
1128
1573
  abs_output_path = output_path if os.path.isabs(output_path) else os.path.abspath(output_path)
1129
1574
  self.logger.info(f"Writing output to {abs_output_path}")
1130
- _write_output(abs_output_path, output.scan_result)
1575
+
1576
+ # Ensure output_path is treated as a directory
1577
+ # If it exists as a file, remove it first
1578
+ if os.path.exists(abs_output_path) and not os.path.isdir(abs_output_path):
1579
+ os.remove(abs_output_path)
1580
+ os.makedirs(abs_output_path, exist_ok=True)
1581
+
1582
+ # Create a copy of scan_result without AOAI properties for eval_result.json
1583
+ scan_result_without_aoai = {
1584
+ key: value
1585
+ for key, value in red_team_result.scan_result.items()
1586
+ if not key.startswith("AOAI_Compatible")
1587
+ }
1588
+
1589
+ # Write scan result without AOAI properties to eval_result.json
1590
+ _write_output(abs_output_path, scan_result_without_aoai)
1591
+
1592
+ # Write the AOAI summary to results.json
1593
+ if aoai_summary:
1594
+ _write_output(os.path.join(abs_output_path, "results.json"), aoai_summary)
1595
+ else:
1596
+ self.logger.warning("AOAI summary not available for output_path write")
1131
1597
 
1132
1598
  # Also save a copy to the scan output directory if available
1133
1599
  if self.scan_output_dir:
1134
1600
  final_output = os.path.join(self.scan_output_dir, "final_results.json")
1135
- _write_output(final_output, output.scan_result)
1136
- elif output.scan_result and self.scan_output_dir:
1601
+ _write_output(final_output, red_team_result.scan_result)
1602
+ elif red_team_result.scan_result and self.scan_output_dir:
1137
1603
  # If no output_path was specified but we have scan_output_dir, save there
1138
1604
  final_output = os.path.join(self.scan_output_dir, "final_results.json")
1139
- _write_output(final_output, output.scan_result)
1605
+ _write_output(final_output, red_team_result.scan_result)
1140
1606
 
1141
1607
  # Display final scorecard and results
1142
- if output.scan_result:
1143
- scorecard = format_scorecard(output.scan_result)
1608
+ if red_team_result.scan_result:
1609
+ scorecard = format_scorecard(red_team_result.scan_result)
1144
1610
  tqdm.write(scorecard)
1145
1611
 
1146
1612
  # Print URL for detailed results
1147
- studio_url = output.scan_result.get("studio_url", "")
1613
+ studio_url = red_team_result.scan_result.get("studio_url", "")
1148
1614
  if studio_url:
1149
1615
  tqdm.write(f"\nDetailed results available at:\n{studio_url}")
1150
1616
 
@@ -1161,4 +1627,4 @@ class RedTeam:
1161
1627
  handler.close()
1162
1628
  self.logger.removeHandler(handler)
1163
1629
 
1164
- return output
1630
+ return red_team_result