azure-ai-evaluation 1.11.2__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. azure/ai/evaluation/__init__.py +2 -0
  2. azure/ai/evaluation/_aoai/aoai_grader.py +69 -28
  3. azure/ai/evaluation/_aoai/label_grader.py +14 -13
  4. azure/ai/evaluation/_aoai/python_grader.py +15 -13
  5. azure/ai/evaluation/_aoai/score_model_grader.py +13 -10
  6. azure/ai/evaluation/_aoai/string_check_grader.py +13 -13
  7. azure/ai/evaluation/_aoai/text_similarity_grader.py +16 -25
  8. azure/ai/evaluation/_common/__init__.py +2 -1
  9. azure/ai/evaluation/_common/constants.py +109 -0
  10. azure/ai/evaluation/_common/evaluation_onedp_client.py +5 -5
  11. azure/ai/evaluation/_common/onedp/__init__.py +2 -2
  12. azure/ai/evaluation/_common/onedp/_client.py +44 -14
  13. azure/ai/evaluation/_common/onedp/_configuration.py +9 -7
  14. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1 -1
  15. azure/ai/evaluation/_common/onedp/_validation.py +18 -2
  16. azure/ai/evaluation/_common/onedp/aio/__init__.py +2 -2
  17. azure/ai/evaluation/_common/onedp/aio/_client.py +44 -14
  18. azure/ai/evaluation/_common/onedp/aio/_configuration.py +9 -7
  19. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +12 -0
  20. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +3942 -1631
  21. azure/ai/evaluation/_common/onedp/models/__init__.py +196 -6
  22. azure/ai/evaluation/_common/onedp/models/_enums.py +217 -0
  23. azure/ai/evaluation/_common/onedp/models/_models.py +3876 -603
  24. azure/ai/evaluation/_common/onedp/operations/__init__.py +12 -0
  25. azure/ai/evaluation/_common/onedp/operations/_operations.py +5422 -2577
  26. azure/ai/evaluation/_common/rai_service.py +299 -2
  27. azure/ai/evaluation/_common/utils.py +173 -39
  28. azure/ai/evaluation/_constants.py +100 -0
  29. azure/ai/evaluation/_eval_mapping.py +10 -0
  30. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +10 -0
  31. azure/ai/evaluation/_evaluate/_evaluate.py +1125 -9
  32. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +330 -51
  33. azure/ai/evaluation/_evaluate/_utils.py +17 -6
  34. azure/ai/evaluation/_evaluator_definition.py +76 -0
  35. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -0
  36. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -17
  37. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +7 -1
  38. azure/ai/evaluation/_evaluators/_common/_base_eval.py +80 -4
  39. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +181 -3
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +7 -1
  41. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +28 -13
  42. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  43. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +23 -4
  44. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -7
  45. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +36 -19
  46. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +43 -20
  47. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +7 -1
  48. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +7 -1
  49. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +32 -6
  50. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  51. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  52. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  53. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  54. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  55. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +23 -127
  56. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  57. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  58. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  59. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  60. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  61. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  62. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  64. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  65. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  66. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  67. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  68. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -19
  69. azure/ai/evaluation/_exceptions.py +6 -0
  70. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +14 -1
  71. azure/ai/evaluation/_legacy/prompty/_prompty.py +2 -1
  72. azure/ai/evaluation/_legacy/prompty/_utils.py +54 -6
  73. azure/ai/evaluation/_model_configurations.py +26 -0
  74. azure/ai/evaluation/_version.py +1 -1
  75. azure/ai/evaluation/red_team/_attack_objective_generator.py +3 -1
  76. azure/ai/evaluation/red_team/_attack_strategy.py +1 -0
  77. azure/ai/evaluation/red_team/_callback_chat_target.py +45 -14
  78. azure/ai/evaluation/red_team/_evaluation_processor.py +129 -12
  79. azure/ai/evaluation/red_team/_mlflow_integration.py +144 -36
  80. azure/ai/evaluation/red_team/_orchestrator_manager.py +309 -51
  81. azure/ai/evaluation/red_team/_red_team.py +503 -37
  82. azure/ai/evaluation/red_team/_red_team_result.py +264 -15
  83. azure/ai/evaluation/red_team/_result_processor.py +953 -31
  84. azure/ai/evaluation/red_team/_utils/constants.py +1 -0
  85. azure/ai/evaluation/red_team/_utils/formatting_utils.py +126 -25
  86. azure/ai/evaluation/red_team/_utils/metric_mapping.py +10 -7
  87. azure/ai/evaluation/red_team/_utils/strategy_utils.py +3 -25
  88. azure/ai/evaluation/simulator/_adversarial_simulator.py +1 -1
  89. azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  90. azure/ai/evaluation/simulator/_conversation/_conversation.py +1 -1
  91. azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
  92. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +1 -1
  93. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +25 -2
  94. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -0
  95. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +1 -1
  96. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  97. azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
  98. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/METADATA +44 -10
  99. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/RECORD +102 -84
  100. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/WHEEL +0 -0
  101. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/licenses/NOTICE.txt +0 -0
  102. {azure_ai_evaluation-1.11.2.dist-info → azure_ai_evaluation-1.13.0.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,9 @@ from pyrit.orchestrator import Orchestrator
24
24
  from pyrit.prompt_converter import PromptConverter
25
25
  from pyrit.prompt_target import PromptChatTarget
26
26
 
27
+ # Local imports
28
+ from ._callback_chat_target import _CallbackChatTarget
29
+
27
30
  # Retry imports
28
31
  import httpx
29
32
  import httpcore
@@ -93,6 +96,7 @@ class OrchestratorManager:
93
96
  one_dp_project,
94
97
  retry_config,
95
98
  scan_output_dir=None,
99
+ red_team=None,
96
100
  ):
97
101
  """Initialize the orchestrator manager.
98
102
 
@@ -103,6 +107,7 @@ class OrchestratorManager:
103
107
  :param one_dp_project: Whether this is a OneDP project
104
108
  :param retry_config: Retry configuration for network errors
105
109
  :param scan_output_dir: Directory for scan outputs
110
+ :param red_team: Reference to RedTeam instance for accessing prompt mappings
106
111
  """
107
112
  self.logger = logger
108
113
  self.generated_rai_client = generated_rai_client
@@ -111,6 +116,7 @@ class OrchestratorManager:
111
116
  self._one_dp_project = one_dp_project
112
117
  self.retry_config = retry_config
113
118
  self.scan_output_dir = scan_output_dir
119
+ self.red_team = red_team
114
120
 
115
121
  def _calculate_timeout(self, base_timeout: int, orchestrator_type: str) -> int:
116
122
  """Calculate appropriate timeout based on orchestrator type.
@@ -192,6 +198,8 @@ class OrchestratorManager:
192
198
  :type red_team_info: Dict
193
199
  :param task_statuses: Dictionary to track task statuses
194
200
  :type task_statuses: Dict
201
+ :param prompt_to_context: Dictionary mapping prompts to their contexts (string or dict format)
202
+ :type prompt_to_context: Dict[str, Union[str, Dict]]
195
203
  :return: Configured and initialized orchestrator
196
204
  :rtype: Orchestrator
197
205
  """
@@ -238,52 +246,140 @@ class OrchestratorManager:
238
246
  if red_team_info:
239
247
  red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
240
248
 
241
- # Process all prompts at once
249
+ # Process prompts one at a time like multi-turn and crescendo orchestrators
242
250
  self.logger.debug(f"Processing {len(all_prompts)} prompts for {strategy_name}/{risk_category_name}")
243
- start_time = datetime.now()
244
251
 
245
252
  # Calculate appropriate timeout for single-turn orchestrator
246
253
  calculated_timeout = self._calculate_timeout(timeout, "single")
247
254
 
248
- try:
249
- # Create retry-enabled function using the reusable decorator
250
- @network_retry_decorator(self.retry_config, self.logger, strategy_name, risk_category_name)
251
- async def send_all_with_retry():
252
- return await asyncio.wait_for(
253
- orchestrator.send_prompts_async(
254
- prompt_list=all_prompts,
255
- memory_labels={
256
- "risk_strategy_path": output_path,
257
- "batch": 1,
258
- },
259
- ),
260
- timeout=calculated_timeout,
261
- )
255
+ for prompt_idx, prompt in enumerate(all_prompts):
256
+ prompt_start_time = datetime.now()
257
+ self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
262
258
 
263
- # Execute the retry-enabled function
264
- await send_all_with_retry()
265
- duration = (datetime.now() - start_time).total_seconds()
266
- self.logger.debug(
267
- f"Successfully processed all prompts for {strategy_name}/{risk_category_name} in {duration:.2f} seconds"
268
- )
269
- except (asyncio.TimeoutError, tenacity.RetryError):
270
- self.logger.warning(
271
- f"Prompt processing for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with partial results"
259
+ # Get context for this prompt
260
+ context_data = prompt_to_context.get(prompt, {}) if prompt_to_context else {}
261
+
262
+ # Normalize context_data: handle both string (legacy) and dict formats
263
+ # If context_data is a string, convert it to the expected dict format
264
+ if isinstance(context_data, str):
265
+ context_data = {"contexts": [{"content": context_data}]} if context_data else {"contexts": []}
266
+
267
+ # context_data is now always a dict with a 'contexts' list
268
+ # Each item in contexts is a dict with 'content' key
269
+ # context_type and tool_name can be present per-context
270
+ contexts = context_data.get("contexts", [])
271
+
272
+ # Check if any context has agent-specific fields (context_type, tool_name)
273
+ has_agent_fields = any(
274
+ isinstance(ctx, dict) and ("context_type" in ctx or "tool_name" in ctx) for ctx in contexts
272
275
  )
273
- print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}")
274
- if task_statuses:
275
- task_statuses[task_key] = TASK_STATUS["TIMEOUT"]
276
- if red_team_info:
277
- red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
278
- except Exception as e:
279
- log_error(
280
- self.logger,
281
- "Error processing prompts",
282
- e,
283
- f"{strategy_name}/{risk_category_name}",
276
+
277
+ # Build context_dict to pass via memory labels
278
+ context_dict = {"contexts": contexts}
279
+
280
+ # Get risk_sub_type for this prompt if it exists
281
+ risk_sub_type = (
282
+ self.red_team.prompt_to_risk_subtype.get(prompt)
283
+ if self.red_team and hasattr(self.red_team, "prompt_to_risk_subtype")
284
+ else None
284
285
  )
285
- if red_team_info:
286
- red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
286
+
287
+ # Initialize processed_prompt with the original prompt as default
288
+ processed_prompt = prompt
289
+
290
+ # Determine how to handle the prompt based on target type and context fields
291
+ if isinstance(chat_target, _CallbackChatTarget):
292
+ # CallbackChatTarget: Always pass contexts via context_dict, embed in prompt content
293
+ if contexts and not has_agent_fields:
294
+ # For contexts without agent fields, the prompt already has context embedded
295
+ # (done in _extract_objective_content), so just use it as-is
296
+ processed_prompt = prompt
297
+ self.logger.debug(
298
+ f"CallbackChatTarget: Prompt has embedded context, passing {len(contexts)} context source(s) in context_dict"
299
+ )
300
+ else:
301
+ # Agent fields present - prompt is clean, contexts have structure
302
+ processed_prompt = prompt
303
+ tool_names = [
304
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
305
+ ]
306
+ self.logger.debug(
307
+ f"CallbackChatTarget: Passing {len(contexts)} structured context(s) with agent fields, tool_names={tool_names}"
308
+ )
309
+ else:
310
+ # Non-CallbackChatTarget: Embed contexts in the actual PyRIT message
311
+ if has_agent_fields:
312
+ # Agent target with structured context - don't embed in prompt
313
+ processed_prompt = prompt
314
+ tool_names = [
315
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
316
+ ]
317
+ self.logger.debug(
318
+ f"Non-CallbackChatTarget with agent fields: {len(contexts)} context source(s), tool_names={tool_names}"
319
+ )
320
+ elif contexts:
321
+ # Model target without agent fields - embed context in prompt
322
+ # Note: The prompt already has context embedded from _extract_objective_content
323
+ # But for non-CallbackChatTarget, we may need additional wrapping
324
+ processed_prompt = prompt
325
+ self.logger.debug(f"Non-CallbackChatTarget: Using prompt with embedded context")
326
+
327
+ try:
328
+ # Create retry-enabled function using the reusable decorator
329
+ @network_retry_decorator(
330
+ self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
331
+ )
332
+ async def send_prompt_with_retry():
333
+ memory_labels = {
334
+ "risk_strategy_path": output_path,
335
+ "batch": prompt_idx + 1,
336
+ "context": context_dict,
337
+ }
338
+ if risk_sub_type:
339
+ memory_labels["risk_sub_type"] = risk_sub_type
340
+ return await asyncio.wait_for(
341
+ orchestrator.send_prompts_async(
342
+ prompt_list=[processed_prompt],
343
+ memory_labels=memory_labels,
344
+ ),
345
+ timeout=calculated_timeout,
346
+ )
347
+
348
+ # Execute the retry-enabled function
349
+ await send_prompt_with_retry()
350
+ prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
351
+ self.logger.debug(
352
+ f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds"
353
+ )
354
+
355
+ # Print progress to console
356
+ if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
357
+ print(
358
+ f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}"
359
+ )
360
+
361
+ except (asyncio.TimeoutError, tenacity.RetryError):
362
+ self.logger.warning(
363
+ f"Prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with remaining prompts"
364
+ )
365
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Prompt {prompt_idx+1}")
366
+ # Set task status to TIMEOUT for this specific prompt
367
+ batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
368
+ if task_statuses:
369
+ task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
370
+ if red_team_info:
371
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
372
+ continue
373
+ except Exception as e:
374
+ log_error(
375
+ self.logger,
376
+ f"Error processing prompt {prompt_idx+1}",
377
+ e,
378
+ f"{strategy_name}/{risk_category_name}",
379
+ )
380
+ if red_team_info:
381
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
382
+ continue
287
383
 
288
384
  if task_statuses:
289
385
  task_statuses[task_key] = TASK_STATUS["COMPLETED"]
@@ -312,7 +408,7 @@ class OrchestratorManager:
312
408
  timeout: int = 120,
313
409
  red_team_info: Dict = None,
314
410
  task_statuses: Dict = None,
315
- prompt_to_context: Dict[str, str] = None,
411
+ prompt_to_context: Dict[str, Union[str, Dict]] = None,
316
412
  ) -> Orchestrator:
317
413
  """Send prompts via the RedTeamingOrchestrator (multi-turn orchestrator).
318
414
 
@@ -381,7 +477,83 @@ class OrchestratorManager:
381
477
  for prompt_idx, prompt in enumerate(all_prompts):
382
478
  prompt_start_time = datetime.now()
383
479
  self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
384
- context = prompt_to_context.get(prompt, None) if prompt_to_context else None
480
+
481
+ # Get context for this prompt
482
+ context_data = prompt_to_context.get(prompt, {}) if prompt_to_context else {}
483
+
484
+ # Normalize context_data: handle both string (legacy) and dict formats
485
+ # If context_data is a string, convert it to the expected dict format
486
+ if isinstance(context_data, str):
487
+ context_data = {"contexts": [{"content": context_data}]} if context_data else {"contexts": []}
488
+
489
+ # context_data is now always a dict with a 'contexts' list
490
+ # Each item in contexts is a dict with 'content' key
491
+ # context_type and tool_name can be present per-context
492
+ contexts = context_data.get("contexts", [])
493
+
494
+ # Check if any context has agent-specific fields (context_type, tool_name)
495
+ has_agent_fields = any(
496
+ isinstance(ctx, dict) and ("context_type" in ctx or "tool_name" in ctx) for ctx in contexts
497
+ )
498
+
499
+ # Build context_dict to pass via memory labels
500
+ context_dict = {"contexts": contexts}
501
+
502
+ # Get risk_sub_type for this prompt if it exists
503
+ risk_sub_type = (
504
+ self.red_team.prompt_to_risk_subtype.get(prompt)
505
+ if self.red_team and hasattr(self.red_team, "prompt_to_risk_subtype")
506
+ else None
507
+ )
508
+
509
+ # For backwards compatibility with scoring, extract string context
510
+ # This is used by AzureRAIServiceTrueFalseScorer which expects a string
511
+ context_string = ""
512
+ if contexts:
513
+ context_string = "\n".join(
514
+ ctx.get("content", "") if isinstance(ctx, dict) else str(ctx) for ctx in contexts
515
+ )
516
+
517
+ # Initialize processed_prompt with the original prompt as default
518
+ processed_prompt = prompt
519
+
520
+ # Determine how to handle the prompt based on target type and context fields
521
+ if isinstance(chat_target, _CallbackChatTarget):
522
+ # CallbackChatTarget: Always pass contexts via context_dict, embed in prompt content
523
+ if contexts and not has_agent_fields:
524
+ # For contexts without agent fields, the prompt already has context embedded
525
+ # (done in _extract_objective_content), so just use it as-is
526
+ processed_prompt = prompt
527
+ self.logger.debug(
528
+ f"CallbackChatTarget: Prompt has embedded context, passing {len(contexts)} context source(s) in context_dict"
529
+ )
530
+ else:
531
+ # Agent fields present - prompt is clean, contexts have structure
532
+ processed_prompt = prompt
533
+ tool_names = [
534
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
535
+ ]
536
+ self.logger.debug(
537
+ f"CallbackChatTarget: Passing {len(contexts)} structured context(s) with agent fields, tool_names={tool_names}"
538
+ )
539
+ else:
540
+ # Non-CallbackChatTarget: Embed contexts in the actual PyRIT message
541
+ if has_agent_fields:
542
+ # Agent target with structured context - don't embed in prompt
543
+ processed_prompt = prompt
544
+ tool_names = [
545
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
546
+ ]
547
+ self.logger.debug(
548
+ f"Non-CallbackChatTarget with agent fields: {len(contexts)} context source(s), tool_names={tool_names}"
549
+ )
550
+ elif contexts:
551
+ # Model target without agent fields - embed context in prompt
552
+ # Note: The prompt already has context embedded from _extract_objective_content
553
+ # But for non-CallbackChatTarget, we may need additional wrapping
554
+ processed_prompt = prompt
555
+ self.logger.debug(f"Non-CallbackChatTarget: Using prompt with embedded context")
556
+
385
557
  try:
386
558
  azure_rai_service_scorer = AzureRAIServiceTrueFalseScorer(
387
559
  client=self.generated_rai_client,
@@ -391,7 +563,7 @@ class OrchestratorManager:
391
563
  credential=self.credential,
392
564
  risk_category=risk_category,
393
565
  azure_ai_project=self.azure_ai_project,
394
- context=context,
566
+ context=context_string,
395
567
  )
396
568
 
397
569
  azure_rai_service_target = AzureRAIServiceTarget(
@@ -419,10 +591,17 @@ class OrchestratorManager:
419
591
  self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
420
592
  )
421
593
  async def send_prompt_with_retry():
594
+ memory_labels = {
595
+ "risk_strategy_path": output_path,
596
+ "batch": prompt_idx + 1,
597
+ "context": context_dict,
598
+ }
599
+ if risk_sub_type:
600
+ memory_labels["risk_sub_type"] = risk_sub_type
422
601
  return await asyncio.wait_for(
423
602
  orchestrator.run_attack_async(
424
603
  objective=prompt,
425
- memory_labels={"risk_strategy_path": output_path, "batch": 1, "context": context},
604
+ memory_labels=memory_labels,
426
605
  ),
427
606
  timeout=calculated_timeout,
428
607
  )
@@ -495,7 +674,7 @@ class OrchestratorManager:
495
674
  timeout: int = 120,
496
675
  red_team_info: Dict = None,
497
676
  task_statuses: Dict = None,
498
- prompt_to_context: Dict[str, str] = None,
677
+ prompt_to_context: Dict[str, Union[str, Dict]] = None,
499
678
  ) -> Orchestrator:
500
679
  """Send prompts via the CrescendoOrchestrator with optimized performance.
501
680
 
@@ -546,14 +725,90 @@ class OrchestratorManager:
546
725
  for prompt_idx, prompt in enumerate(all_prompts):
547
726
  prompt_start_time = datetime.now()
548
727
  self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
549
- context = prompt_to_context.get(prompt, None) if prompt_to_context else None
728
+
729
+ # Get context for this prompt
730
+ context_data = prompt_to_context.get(prompt, {}) if prompt_to_context else {}
731
+
732
+ # Normalize context_data: handle both string (legacy) and dict formats
733
+ # If context_data is a string, convert it to the expected dict format
734
+ if isinstance(context_data, str):
735
+ context_data = {"contexts": [{"content": context_data}]} if context_data else {"contexts": []}
736
+
737
+ # context_data is now always a dict with a 'contexts' list
738
+ # Each item in contexts is a dict with 'content' key
739
+ # context_type and tool_name can be present per-context
740
+ contexts = context_data.get("contexts", [])
741
+
742
+ # Check if any context has agent-specific fields (context_type, tool_name)
743
+ has_agent_fields = any(
744
+ isinstance(ctx, dict) and ("context_type" in ctx or "tool_name" in ctx) for ctx in contexts
745
+ )
746
+
747
+ # Build context_dict to pass via memory labels
748
+ context_dict = {"contexts": contexts}
749
+
750
+ # Get risk_sub_type for this prompt if it exists
751
+ risk_sub_type = (
752
+ self.red_team.prompt_to_risk_subtype.get(prompt)
753
+ if self.red_team and hasattr(self.red_team, "prompt_to_risk_subtype")
754
+ else None
755
+ )
756
+
757
+ # For backwards compatibility with scoring, extract string context
758
+ # This is used by AzureRAIServiceTrueFalseScorer and RAIServiceEvalChatTarget which expect a string
759
+ context_string = ""
760
+ if contexts:
761
+ context_string = "\n".join(
762
+ ctx.get("content", "") if isinstance(ctx, dict) else str(ctx) for ctx in contexts
763
+ )
764
+
765
+ # Initialize processed_prompt with the original prompt as default
766
+ processed_prompt = prompt
767
+
768
+ # Determine how to handle the prompt based on target type and context fields
769
+ if isinstance(chat_target, _CallbackChatTarget):
770
+ # CallbackChatTarget: Always pass contexts via context_dict, embed in prompt content
771
+ if contexts and not has_agent_fields:
772
+ # For contexts without agent fields, the prompt already has context embedded
773
+ # (done in _extract_objective_content), so just use it as-is
774
+ processed_prompt = prompt
775
+ self.logger.debug(
776
+ f"CallbackChatTarget: Prompt has embedded context, passing {len(contexts)} context source(s) in context_dict"
777
+ )
778
+ else:
779
+ # Agent fields present - prompt is clean, contexts have structure
780
+ processed_prompt = prompt
781
+ tool_names = [
782
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
783
+ ]
784
+ self.logger.debug(
785
+ f"CallbackChatTarget: Passing {len(contexts)} structured context(s) with agent fields, tool_names={tool_names}"
786
+ )
787
+ else:
788
+ # Non-CallbackChatTarget: Embed contexts in the actual PyRIT message
789
+ if has_agent_fields:
790
+ # Agent target with structured context - don't embed in prompt
791
+ processed_prompt = prompt
792
+ tool_names = [
793
+ ctx.get("tool_name") for ctx in contexts if isinstance(ctx, dict) and "tool_name" in ctx
794
+ ]
795
+ self.logger.debug(
796
+ f"Non-CallbackChatTarget with agent fields: {len(contexts)} context source(s), tool_names={tool_names}"
797
+ )
798
+ elif contexts:
799
+ # Model target without agent fields - embed context in prompt
800
+ # Note: The prompt already has context embedded from _extract_objective_content
801
+ # But for non-CallbackChatTarget, we may need additional wrapping
802
+ processed_prompt = prompt
803
+ self.logger.debug(f"Non-CallbackChatTarget: Using prompt with embedded context")
804
+
550
805
  try:
551
806
  red_llm_scoring_target = RAIServiceEvalChatTarget(
552
807
  logger=self.logger,
553
808
  credential=self.credential,
554
809
  risk_category=risk_category,
555
810
  azure_ai_project=self.azure_ai_project,
556
- context=context,
811
+ context=context_string,
557
812
  )
558
813
 
559
814
  azure_rai_service_target = AzureRAIServiceTarget(
@@ -583,7 +838,7 @@ class OrchestratorManager:
583
838
  credential=self.credential,
584
839
  risk_category=risk_category,
585
840
  azure_ai_project=self.azure_ai_project,
586
- context=context,
841
+ context=context_string,
587
842
  )
588
843
 
589
844
  try:
@@ -592,14 +847,17 @@ class OrchestratorManager:
592
847
  self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
593
848
  )
594
849
  async def send_prompt_with_retry():
850
+ memory_labels = {
851
+ "risk_strategy_path": output_path,
852
+ "batch": prompt_idx + 1,
853
+ "context": context_dict,
854
+ }
855
+ if risk_sub_type:
856
+ memory_labels["risk_sub_type"] = risk_sub_type
595
857
  return await asyncio.wait_for(
596
858
  orchestrator.run_attack_async(
597
859
  objective=prompt,
598
- memory_labels={
599
- "risk_strategy_path": output_path,
600
- "batch": prompt_idx + 1,
601
- "context": context,
602
- },
860
+ memory_labels=memory_labels,
603
861
  ),
604
862
  timeout=calculated_timeout,
605
863
  )