azure-ai-evaluation 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (85) hide show
  1. azure/ai/evaluation/__init__.py +46 -12
  2. azure/ai/evaluation/_aoai/python_grader.py +84 -0
  3. azure/ai/evaluation/_aoai/score_model_grader.py +1 -0
  4. azure/ai/evaluation/_common/onedp/models/_models.py +5 -0
  5. azure/ai/evaluation/_common/rai_service.py +3 -3
  6. azure/ai/evaluation/_common/utils.py +74 -17
  7. azure/ai/evaluation/_converters/_ai_services.py +60 -10
  8. azure/ai/evaluation/_converters/_models.py +75 -26
  9. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +70 -22
  10. azure/ai/evaluation/_evaluate/_eval_run.py +14 -1
  11. azure/ai/evaluation/_evaluate/_evaluate.py +163 -44
  12. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +79 -33
  13. azure/ai/evaluation/_evaluate/_utils.py +5 -2
  14. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  15. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +8 -1
  16. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +3 -2
  17. azure/ai/evaluation/_evaluators/_common/_base_eval.py +143 -25
  18. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +7 -2
  19. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +19 -9
  20. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -5
  21. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +4 -1
  22. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +4 -1
  23. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +5 -2
  24. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +4 -1
  25. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +3 -0
  26. azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -0
  27. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +1 -1
  28. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -2
  29. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  30. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +114 -4
  31. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +9 -3
  32. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -1
  33. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +8 -1
  34. azure/ai/evaluation/_evaluators/_qa/_qa.py +1 -1
  35. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +56 -3
  36. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +140 -59
  37. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +11 -3
  38. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +3 -2
  39. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
  40. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +2 -1
  41. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -2
  42. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +24 -12
  43. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +354 -66
  44. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +214 -187
  45. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +126 -31
  46. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +8 -1
  47. azure/ai/evaluation/_evaluators/_xpia/xpia.py +4 -1
  48. azure/ai/evaluation/_exceptions.py +1 -0
  49. azure/ai/evaluation/_legacy/_batch_engine/_config.py +6 -3
  50. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +115 -30
  51. azure/ai/evaluation/_legacy/_batch_engine/_result.py +2 -0
  52. azure/ai/evaluation/_legacy/_batch_engine/_run.py +2 -2
  53. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +28 -31
  54. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +2 -0
  55. azure/ai/evaluation/_version.py +1 -1
  56. azure/ai/evaluation/red_team/__init__.py +4 -3
  57. azure/ai/evaluation/red_team/_attack_objective_generator.py +17 -0
  58. azure/ai/evaluation/red_team/_callback_chat_target.py +14 -1
  59. azure/ai/evaluation/red_team/_evaluation_processor.py +376 -0
  60. azure/ai/evaluation/red_team/_mlflow_integration.py +322 -0
  61. azure/ai/evaluation/red_team/_orchestrator_manager.py +661 -0
  62. azure/ai/evaluation/red_team/_red_team.py +655 -2665
  63. azure/ai/evaluation/red_team/_red_team_result.py +6 -0
  64. azure/ai/evaluation/red_team/_result_processor.py +610 -0
  65. azure/ai/evaluation/red_team/_utils/__init__.py +34 -0
  66. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +11 -4
  67. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +6 -0
  68. azure/ai/evaluation/red_team/_utils/constants.py +0 -2
  69. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  70. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  71. azure/ai/evaluation/red_team/_utils/formatting_utils.py +115 -13
  72. azure/ai/evaluation/red_team/_utils/metric_mapping.py +24 -4
  73. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  74. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  75. azure/ai/evaluation/red_team/_utils/strategy_utils.py +17 -4
  76. azure/ai/evaluation/simulator/_adversarial_simulator.py +14 -2
  77. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +13 -1
  78. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +21 -7
  79. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +24 -5
  80. azure/ai/evaluation/simulator/_simulator.py +12 -0
  81. {azure_ai_evaluation-1.9.0.dist-info → azure_ai_evaluation-1.11.0.dist-info}/METADATA +63 -4
  82. {azure_ai_evaluation-1.9.0.dist-info → azure_ai_evaluation-1.11.0.dist-info}/RECORD +85 -76
  83. {azure_ai_evaluation-1.9.0.dist-info → azure_ai_evaluation-1.11.0.dist-info}/WHEEL +1 -1
  84. {azure_ai_evaluation-1.9.0.dist-info → azure_ai_evaluation-1.11.0.dist-info/licenses}/NOTICE.txt +0 -0
  85. {azure_ai_evaluation-1.9.0.dist-info → azure_ai_evaluation-1.11.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,661 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ """
5
+ Orchestrator management module for Red Team Agent.
6
+
7
+ This module handles PyRIT orchestrator initialization, execution, and management
8
+ for different attack strategies including single-turn, multi-turn, and crescendo attacks.
9
+ """
10
+
11
+ import asyncio
12
+ import math
13
+ import os
14
+ import uuid
15
+ from datetime import datetime
16
+ from typing import Dict, List, Optional, Union, Callable
17
+ from tqdm import tqdm
18
+
19
+ # PyRIT imports
20
+ from pyrit.orchestrator.single_turn.prompt_sending_orchestrator import PromptSendingOrchestrator
21
+ from pyrit.orchestrator.multi_turn.red_teaming_orchestrator import RedTeamingOrchestrator
22
+ from pyrit.orchestrator.multi_turn.crescendo_orchestrator import CrescendoOrchestrator
23
+ from pyrit.orchestrator import Orchestrator
24
+ from pyrit.prompt_converter import PromptConverter
25
+ from pyrit.prompt_target import PromptChatTarget
26
+
27
+ # Retry imports
28
+ import httpx
29
+ import httpcore
30
+ import tenacity
31
+ from tenacity import retry
32
+
33
+ # Local imports
34
+ from ._attack_strategy import AttackStrategy
35
+ from ._attack_objective_generator import RiskCategory
36
+ from ._utils._rai_service_target import AzureRAIServiceTarget
37
+ from ._utils._rai_service_true_false_scorer import AzureRAIServiceTrueFalseScorer
38
+ from ._utils._rai_service_eval_chat_target import RAIServiceEvalChatTarget
39
+ from ._utils.constants import DATA_EXT, TASK_STATUS
40
+ from ._utils.logging_utils import log_strategy_start, log_error
41
+ from ._utils.formatting_utils import write_pyrit_outputs_to_file
42
+
43
+
44
+ def network_retry_decorator(retry_config, logger, strategy_name, risk_category_name, prompt_idx=None):
45
+ """Create a reusable retry decorator for network operations.
46
+
47
+ :param retry_config: Retry configuration dictionary
48
+ :param logger: Logger instance for logging warnings
49
+ :param strategy_name: Name of the attack strategy
50
+ :param risk_category_name: Name of the risk category
51
+ :param prompt_idx: Optional prompt index for detailed logging
52
+ :return: Configured retry decorator
53
+ """
54
+
55
+ def decorator(func):
56
+ @retry(**retry_config["network_retry"])
57
+ async def wrapper(*args, **kwargs):
58
+ try:
59
+ return await func(*args, **kwargs)
60
+ except (
61
+ httpx.ConnectTimeout,
62
+ httpx.ReadTimeout,
63
+ httpx.ConnectError,
64
+ httpx.HTTPError,
65
+ ConnectionError,
66
+ TimeoutError,
67
+ OSError,
68
+ asyncio.TimeoutError,
69
+ httpcore.ReadTimeout,
70
+ httpx.HTTPStatusError,
71
+ ) as e:
72
+ prompt_detail = f" for prompt {prompt_idx}" if prompt_idx is not None else ""
73
+ logger.warning(
74
+ f"Network error{prompt_detail} for {strategy_name}/{risk_category_name}: {type(e).__name__}: {str(e)}"
75
+ )
76
+ await asyncio.sleep(2)
77
+ raise
78
+
79
+ return wrapper
80
+
81
+ return decorator
82
+
83
+
84
+ class OrchestratorManager:
85
+ """Manages PyRIT orchestrators for different attack strategies."""
86
+
87
+ def __init__(
88
+ self,
89
+ logger,
90
+ generated_rai_client,
91
+ credential,
92
+ azure_ai_project,
93
+ one_dp_project,
94
+ retry_config,
95
+ scan_output_dir=None,
96
+ ):
97
+ """Initialize the orchestrator manager.
98
+
99
+ :param logger: Logger instance for logging
100
+ :param generated_rai_client: RAI client for service interactions
101
+ :param credential: Authentication credential
102
+ :param azure_ai_project: Azure AI project configuration
103
+ :param one_dp_project: Whether this is a OneDP project
104
+ :param retry_config: Retry configuration for network errors
105
+ :param scan_output_dir: Directory for scan outputs
106
+ """
107
+ self.logger = logger
108
+ self.generated_rai_client = generated_rai_client
109
+ self.credential = credential
110
+ self.azure_ai_project = azure_ai_project
111
+ self._one_dp_project = one_dp_project
112
+ self.retry_config = retry_config
113
+ self.scan_output_dir = scan_output_dir
114
+
115
+ def _calculate_timeout(self, base_timeout: int, orchestrator_type: str) -> int:
116
+ """Calculate appropriate timeout based on orchestrator type.
117
+
118
+ Multi-turn and crescendo orchestrators need more generous timeouts due to their
119
+ iterative nature and multiple API calls per prompt.
120
+
121
+ :param base_timeout: Base timeout value in seconds
122
+ :param orchestrator_type: Type of orchestrator ('single', 'multi_turn', 'crescendo')
123
+ :return: Calculated timeout in seconds
124
+ """
125
+ timeout_multipliers = {
126
+ "single": 1.0, # Standard timeout for single-turn
127
+ "multi_turn": 3.0, # 3x timeout for multi-turn interactions
128
+ "crescendo": 4.0, # 4x timeout for crescendo with backtracks
129
+ }
130
+
131
+ multiplier = timeout_multipliers.get(orchestrator_type, 1.0)
132
+ calculated_timeout = int(base_timeout * multiplier)
133
+
134
+ self.logger.debug(
135
+ f"Calculated timeout for {orchestrator_type} orchestrator: {calculated_timeout}s "
136
+ f"(base: {base_timeout}s, multiplier: {multiplier}x)"
137
+ )
138
+
139
+ return calculated_timeout
140
+
141
+ def get_orchestrator_for_attack_strategy(
142
+ self, attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
143
+ ) -> Callable:
144
+ """Get appropriate orchestrator function for the specified attack strategy.
145
+
146
+ :param attack_strategy: Attack strategy to get orchestrator for
147
+ :type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
148
+ :return: Callable orchestrator function
149
+ :rtype: Callable
150
+ """
151
+ if isinstance(attack_strategy, list):
152
+ if AttackStrategy.MultiTurn in attack_strategy or AttackStrategy.Crescendo in attack_strategy:
153
+ self.logger.error("MultiTurn and Crescendo strategies are not supported in composed attacks.")
154
+ raise ValueError("MultiTurn and Crescendo strategies are not supported in composed attacks.")
155
+ elif AttackStrategy.MultiTurn == attack_strategy:
156
+ return self._multi_turn_orchestrator
157
+ elif AttackStrategy.Crescendo == attack_strategy:
158
+ return self._crescendo_orchestrator
159
+ return self._prompt_sending_orchestrator
160
+
161
+ async def _prompt_sending_orchestrator(
162
+ self,
163
+ chat_target: PromptChatTarget,
164
+ all_prompts: List[str],
165
+ converter: Union[PromptConverter, List[PromptConverter]],
166
+ *,
167
+ strategy_name: str = "unknown",
168
+ risk_category_name: str = "unknown",
169
+ risk_category: Optional[RiskCategory] = None,
170
+ timeout: int = 120,
171
+ red_team_info: Dict = None,
172
+ task_statuses: Dict = None,
173
+ prompt_to_context: Dict[str, str] = None,
174
+ ) -> Orchestrator:
175
+ """Send prompts via the PromptSendingOrchestrator.
176
+
177
+ :param chat_target: The target to send prompts to
178
+ :type chat_target: PromptChatTarget
179
+ :param all_prompts: List of prompts to process and send
180
+ :type all_prompts: List[str]
181
+ :param converter: Prompt converter or list of converters to transform prompts
182
+ :type converter: Union[PromptConverter, List[PromptConverter]]
183
+ :param strategy_name: Name of the attack strategy being used
184
+ :type strategy_name: str
185
+ :param risk_category_name: Name of the risk category being evaluated
186
+ :type risk_category_name: str
187
+ :param risk_category: Risk category being evaluated
188
+ :type risk_category: Optional[RiskCategory]
189
+ :param timeout: Timeout in seconds for each prompt
190
+ :type timeout: int
191
+ :param red_team_info: Dictionary to store file paths and results
192
+ :type red_team_info: Dict
193
+ :param task_statuses: Dictionary to track task statuses
194
+ :type task_statuses: Dict
195
+ :return: Configured and initialized orchestrator
196
+ :rtype: Orchestrator
197
+ """
198
+ task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
199
+ if task_statuses:
200
+ task_statuses[task_key] = TASK_STATUS["RUNNING"]
201
+
202
+ log_strategy_start(self.logger, strategy_name, risk_category_name)
203
+
204
+ # Create converter list from single converter or list of converters
205
+ converter_list = (
206
+ [converter] if converter and isinstance(converter, PromptConverter) else converter if converter else []
207
+ )
208
+
209
+ # Log which converter is being used
210
+ if converter_list:
211
+ if isinstance(converter_list, list) and len(converter_list) > 0:
212
+ converter_names = [c.__class__.__name__ for c in converter_list if c is not None]
213
+ self.logger.debug(f"Using converters: {', '.join(converter_names)}")
214
+ elif converter is not None:
215
+ self.logger.debug(f"Using converter: {converter.__class__.__name__}")
216
+ else:
217
+ self.logger.debug("No converters specified")
218
+
219
+ # Initialize orchestrator
220
+ try:
221
+ orchestrator = PromptSendingOrchestrator(objective_target=chat_target, prompt_converters=converter_list)
222
+
223
+ if not all_prompts:
224
+ self.logger.warning(f"No prompts provided to orchestrator for {strategy_name}/{risk_category_name}")
225
+ if task_statuses:
226
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
227
+ return orchestrator
228
+
229
+ # Initialize output path for memory labelling
230
+ base_path = str(uuid.uuid4())
231
+
232
+ # If scan output directory exists, place the file there
233
+ if self.scan_output_dir:
234
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
235
+ else:
236
+ output_path = f"{base_path}{DATA_EXT}"
237
+
238
+ if red_team_info:
239
+ red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
240
+
241
+ # Process all prompts at once
242
+ self.logger.debug(f"Processing {len(all_prompts)} prompts for {strategy_name}/{risk_category_name}")
243
+ start_time = datetime.now()
244
+
245
+ # Calculate appropriate timeout for single-turn orchestrator
246
+ calculated_timeout = self._calculate_timeout(timeout, "single")
247
+
248
+ try:
249
+ # Create retry-enabled function using the reusable decorator
250
+ @network_retry_decorator(self.retry_config, self.logger, strategy_name, risk_category_name)
251
+ async def send_all_with_retry():
252
+ return await asyncio.wait_for(
253
+ orchestrator.send_prompts_async(
254
+ prompt_list=all_prompts,
255
+ memory_labels={
256
+ "risk_strategy_path": output_path,
257
+ "batch": 1,
258
+ },
259
+ ),
260
+ timeout=calculated_timeout,
261
+ )
262
+
263
+ # Execute the retry-enabled function
264
+ await send_all_with_retry()
265
+ duration = (datetime.now() - start_time).total_seconds()
266
+ self.logger.debug(
267
+ f"Successfully processed all prompts for {strategy_name}/{risk_category_name} in {duration:.2f} seconds"
268
+ )
269
+ except (asyncio.TimeoutError, tenacity.RetryError):
270
+ self.logger.warning(
271
+ f"Prompt processing for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with partial results"
272
+ )
273
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}")
274
+ if task_statuses:
275
+ task_statuses[task_key] = TASK_STATUS["TIMEOUT"]
276
+ if red_team_info:
277
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
278
+ except Exception as e:
279
+ log_error(
280
+ self.logger,
281
+ "Error processing prompts",
282
+ e,
283
+ f"{strategy_name}/{risk_category_name}",
284
+ )
285
+ if red_team_info:
286
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
287
+
288
+ if task_statuses:
289
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
290
+ return orchestrator
291
+
292
+ except Exception as e:
293
+ log_error(
294
+ self.logger,
295
+ "Failed to initialize orchestrator",
296
+ e,
297
+ f"{strategy_name}/{risk_category_name}",
298
+ )
299
+ if task_statuses:
300
+ task_statuses[task_key] = TASK_STATUS["FAILED"]
301
+ raise
302
+
303
+ async def _multi_turn_orchestrator(
304
+ self,
305
+ chat_target: PromptChatTarget,
306
+ all_prompts: List[str],
307
+ converter: Union[PromptConverter, List[PromptConverter]],
308
+ *,
309
+ strategy_name: str = "unknown",
310
+ risk_category_name: str = "unknown",
311
+ risk_category: Optional[RiskCategory] = None,
312
+ timeout: int = 120,
313
+ red_team_info: Dict = None,
314
+ task_statuses: Dict = None,
315
+ prompt_to_context: Dict[str, str] = None,
316
+ ) -> Orchestrator:
317
+ """Send prompts via the RedTeamingOrchestrator (multi-turn orchestrator).
318
+
319
+ :param chat_target: The target to send prompts to
320
+ :type chat_target: PromptChatTarget
321
+ :param all_prompts: List of prompts to process and send
322
+ :type all_prompts: List[str]
323
+ :param converter: Prompt converter or list of converters to transform prompts
324
+ :type converter: Union[PromptConverter, List[PromptConverter]]
325
+ :param strategy_name: Name of the attack strategy being used
326
+ :type strategy_name: str
327
+ :param risk_category_name: Name of the risk category being evaluated
328
+ :type risk_category_name: str
329
+ :param risk_category: Risk category being evaluated
330
+ :type risk_category: Optional[RiskCategory]
331
+ :param timeout: Timeout in seconds for each prompt
332
+ :type timeout: int
333
+ :param red_team_info: Dictionary to store file paths and results
334
+ :type red_team_info: Dict
335
+ :param task_statuses: Dictionary to track task statuses
336
+ :type task_statuses: Dict
337
+ :return: Configured and initialized orchestrator
338
+ :rtype: Orchestrator
339
+ """
340
+ max_turns = 5 # Set a default max turns value
341
+ task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
342
+ if task_statuses:
343
+ task_statuses[task_key] = TASK_STATUS["RUNNING"]
344
+
345
+ log_strategy_start(self.logger, strategy_name, risk_category_name)
346
+ converter_list = []
347
+ # Create converter list from single converter or list of converters
348
+ if converter and isinstance(converter, PromptConverter):
349
+ converter_list = [converter]
350
+ elif converter and isinstance(converter, list):
351
+ # Filter out None values from the converter list
352
+ converter_list = [c for c in converter if c is not None]
353
+
354
+ # Log which converter is being used
355
+ if converter_list:
356
+ if isinstance(converter_list, list) and len(converter_list) > 0:
357
+ converter_names = [c.__class__.__name__ for c in converter_list if c is not None]
358
+ self.logger.debug(f"Using converters: {', '.join(converter_names)}")
359
+ elif converter is not None:
360
+ self.logger.debug(f"Using converter: {converter.__class__.__name__}")
361
+ else:
362
+ self.logger.debug("No converters specified")
363
+
364
+ # Initialize output path for memory labelling
365
+ base_path = str(uuid.uuid4())
366
+
367
+ # If scan output directory exists, place the file there
368
+ if self.scan_output_dir:
369
+ # Ensure the directory exists
370
+ os.makedirs(self.scan_output_dir, exist_ok=True)
371
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
372
+ else:
373
+ output_path = f"{base_path}{DATA_EXT}"
374
+
375
+ if red_team_info:
376
+ red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
377
+
378
+ # Calculate appropriate timeout for multi-turn orchestrator
379
+ calculated_timeout = self._calculate_timeout(timeout, "multi_turn")
380
+
381
+ for prompt_idx, prompt in enumerate(all_prompts):
382
+ prompt_start_time = datetime.now()
383
+ self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
384
+ context = prompt_to_context.get(prompt, None) if prompt_to_context else None
385
+ try:
386
+ azure_rai_service_scorer = AzureRAIServiceTrueFalseScorer(
387
+ client=self.generated_rai_client,
388
+ api_version=None,
389
+ model="gpt-4",
390
+ logger=self.logger,
391
+ credential=self.credential,
392
+ risk_category=risk_category,
393
+ azure_ai_project=self.azure_ai_project,
394
+ context=context,
395
+ )
396
+
397
+ azure_rai_service_target = AzureRAIServiceTarget(
398
+ client=self.generated_rai_client,
399
+ api_version=None,
400
+ model="gpt-4",
401
+ prompt_template_key="orchestrators/red_teaming/text_generation.yaml",
402
+ objective=prompt,
403
+ logger=self.logger,
404
+ is_one_dp_project=self._one_dp_project,
405
+ )
406
+
407
+ orchestrator = RedTeamingOrchestrator(
408
+ objective_target=chat_target,
409
+ adversarial_chat=azure_rai_service_target,
410
+ max_turns=max_turns,
411
+ prompt_converters=converter_list,
412
+ objective_scorer=azure_rai_service_scorer,
413
+ use_score_as_feedback=False,
414
+ )
415
+
416
+ try:
417
+ # Create retry-enabled function using the reusable decorator
418
+ @network_retry_decorator(
419
+ self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
420
+ )
421
+ async def send_prompt_with_retry():
422
+ return await asyncio.wait_for(
423
+ orchestrator.run_attack_async(
424
+ objective=prompt,
425
+ memory_labels={"risk_strategy_path": output_path, "batch": 1, "context": context},
426
+ ),
427
+ timeout=calculated_timeout,
428
+ )
429
+
430
+ # Execute the retry-enabled function
431
+ await send_prompt_with_retry()
432
+ prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
433
+ self.logger.debug(
434
+ f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds"
435
+ )
436
+
437
+ # Write outputs to file after each prompt is processed
438
+ write_pyrit_outputs_to_file(
439
+ output_path=output_path,
440
+ logger=self.logger,
441
+ prompt_to_context=prompt_to_context,
442
+ )
443
+
444
+ # Print progress to console
445
+ if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
446
+ print(
447
+ f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}"
448
+ )
449
+
450
+ except (asyncio.TimeoutError, tenacity.RetryError):
451
+ self.logger.warning(
452
+ f"Batch {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with partial results"
453
+ )
454
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1}")
455
+ # Set task status to TIMEOUT
456
+ batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
457
+ if task_statuses:
458
+ task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
459
+ if red_team_info:
460
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
461
+ continue
462
+ except Exception as e:
463
+ log_error(
464
+ self.logger,
465
+ f"Error processing prompt {prompt_idx+1}",
466
+ e,
467
+ f"{strategy_name}/{risk_category_name}",
468
+ )
469
+ if red_team_info:
470
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
471
+ continue
472
+ except Exception as e:
473
+ log_error(
474
+ self.logger,
475
+ "Failed to initialize orchestrator",
476
+ e,
477
+ f"{strategy_name}/{risk_category_name}",
478
+ )
479
+ if task_statuses:
480
+ task_statuses[task_key] = TASK_STATUS["FAILED"]
481
+ raise
482
+ if task_statuses:
483
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
484
+ return orchestrator
485
+
486
+ async def _crescendo_orchestrator(
487
+ self,
488
+ chat_target: PromptChatTarget,
489
+ all_prompts: List[str],
490
+ converter: Union[PromptConverter, List[PromptConverter]],
491
+ *,
492
+ strategy_name: str = "unknown",
493
+ risk_category_name: str = "unknown",
494
+ risk_category: Optional[RiskCategory] = None,
495
+ timeout: int = 120,
496
+ red_team_info: Dict = None,
497
+ task_statuses: Dict = None,
498
+ prompt_to_context: Dict[str, str] = None,
499
+ ) -> Orchestrator:
500
+ """Send prompts via the CrescendoOrchestrator with optimized performance.
501
+
502
+ :param chat_target: The target to send prompts to
503
+ :type chat_target: PromptChatTarget
504
+ :param all_prompts: List of prompts to process and send
505
+ :type all_prompts: List[str]
506
+ :param converter: Prompt converter or list of converters to transform prompts
507
+ :type converter: Union[PromptConverter, List[PromptConverter]]
508
+ :param strategy_name: Name of the attack strategy being used
509
+ :type strategy_name: str
510
+ :param risk_category_name: Name of the risk category being evaluated
511
+ :type risk_category_name: str
512
+ :param risk_category: Risk category being evaluated
513
+ :type risk_category: Optional[RiskCategory]
514
+ :param timeout: Timeout in seconds for each prompt
515
+ :type timeout: int
516
+ :param red_team_info: Dictionary to store file paths and results
517
+ :type red_team_info: Dict
518
+ :param task_statuses: Dictionary to track task statuses
519
+ :type task_statuses: Dict
520
+ :return: Configured and initialized orchestrator
521
+ :rtype: Orchestrator
522
+ """
523
+ max_turns = 10 # Set a default max turns value
524
+ max_backtracks = 5
525
+ task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
526
+ if task_statuses:
527
+ task_statuses[task_key] = TASK_STATUS["RUNNING"]
528
+
529
+ log_strategy_start(self.logger, strategy_name, risk_category_name)
530
+
531
+ # Initialize output path for memory labelling
532
+ base_path = str(uuid.uuid4())
533
+
534
+ # If scan output directory exists, place the file there
535
+ if self.scan_output_dir:
536
+ output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
537
+ else:
538
+ output_path = f"{base_path}{DATA_EXT}"
539
+
540
+ if red_team_info:
541
+ red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
542
+
543
+ # Calculate appropriate timeout for crescendo orchestrator
544
+ calculated_timeout = self._calculate_timeout(timeout, "crescendo")
545
+
546
+ for prompt_idx, prompt in enumerate(all_prompts):
547
+ prompt_start_time = datetime.now()
548
+ self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
549
+ context = prompt_to_context.get(prompt, None) if prompt_to_context else None
550
+ try:
551
+ red_llm_scoring_target = RAIServiceEvalChatTarget(
552
+ logger=self.logger,
553
+ credential=self.credential,
554
+ risk_category=risk_category,
555
+ azure_ai_project=self.azure_ai_project,
556
+ context=context,
557
+ )
558
+
559
+ azure_rai_service_target = AzureRAIServiceTarget(
560
+ client=self.generated_rai_client,
561
+ api_version=None,
562
+ model="gpt-4",
563
+ prompt_template_key="orchestrators/crescendo/crescendo_variant_1.yaml",
564
+ objective=prompt,
565
+ logger=self.logger,
566
+ crescendo_format=True,
567
+ is_one_dp_project=self._one_dp_project,
568
+ )
569
+
570
+ orchestrator = CrescendoOrchestrator(
571
+ objective_target=chat_target,
572
+ adversarial_chat=azure_rai_service_target,
573
+ max_turns=max_turns,
574
+ scoring_target=red_llm_scoring_target,
575
+ max_backtracks=max_backtracks,
576
+ )
577
+
578
+ orchestrator._objective_scorer = AzureRAIServiceTrueFalseScorer(
579
+ client=self.generated_rai_client,
580
+ api_version=None,
581
+ model="gpt-4",
582
+ logger=self.logger,
583
+ credential=self.credential,
584
+ risk_category=risk_category,
585
+ azure_ai_project=self.azure_ai_project,
586
+ context=context,
587
+ )
588
+
589
+ try:
590
+ # Create retry-enabled function using the reusable decorator
591
+ @network_retry_decorator(
592
+ self.retry_config, self.logger, strategy_name, risk_category_name, prompt_idx + 1
593
+ )
594
+ async def send_prompt_with_retry():
595
+ return await asyncio.wait_for(
596
+ orchestrator.run_attack_async(
597
+ objective=prompt,
598
+ memory_labels={
599
+ "risk_strategy_path": output_path,
600
+ "batch": prompt_idx + 1,
601
+ "context": context,
602
+ },
603
+ ),
604
+ timeout=calculated_timeout,
605
+ )
606
+
607
+ # Execute the retry-enabled function
608
+ await send_prompt_with_retry()
609
+ prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
610
+ self.logger.debug(
611
+ f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds"
612
+ )
613
+
614
+ # Write outputs to file after each prompt is processed
615
+ write_pyrit_outputs_to_file(
616
+ output_path=output_path,
617
+ logger=self.logger,
618
+ prompt_to_context=prompt_to_context,
619
+ )
620
+
621
+ # Print progress to console
622
+ if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
623
+ print(
624
+ f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}"
625
+ )
626
+
627
+ except (asyncio.TimeoutError, tenacity.RetryError):
628
+ self.logger.warning(
629
+ f"Batch {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {calculated_timeout} seconds, continuing with partial results"
630
+ )
631
+ print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1}")
632
+ # Set task status to TIMEOUT
633
+ batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
634
+ if task_statuses:
635
+ task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
636
+ if red_team_info:
637
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
638
+ continue
639
+ except Exception as e:
640
+ log_error(
641
+ self.logger,
642
+ f"Error processing prompt {prompt_idx+1}",
643
+ e,
644
+ f"{strategy_name}/{risk_category_name}",
645
+ )
646
+ if red_team_info:
647
+ red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
648
+ continue
649
+ except Exception as e:
650
+ log_error(
651
+ self.logger,
652
+ "Failed to initialize orchestrator",
653
+ e,
654
+ f"{strategy_name}/{risk_category_name}",
655
+ )
656
+ if task_statuses:
657
+ task_statuses[task_key] = TASK_STATUS["FAILED"]
658
+ raise
659
+ if task_statuses:
660
+ task_statuses[task_key] = TASK_STATUS["COMPLETED"]
661
+ return orchestrator