azure-ai-evaluation 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +1 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +1 -1
- azure/ai/evaluation/_aoai/label_grader.py +2 -2
- azure/ai/evaluation/_aoai/string_check_grader.py +2 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +2 -2
- azure/ai/evaluation/_common/__init__.py +3 -1
- azure/ai/evaluation/_common/evaluation_onedp_client.py +50 -5
- azure/ai/evaluation/_common/onedp/operations/_operations.py +4 -2
- azure/ai/evaluation/_common/rai_service.py +7 -6
- azure/ai/evaluation/_converters/_ai_services.py +162 -118
- azure/ai/evaluation/_converters/_models.py +76 -6
- azure/ai/evaluation/_eval_mapping.py +2 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +15 -17
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +24 -5
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +4 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +31 -29
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +10 -0
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +10 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +10 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +13 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +14 -4
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +10 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +80 -10
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +26 -7
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +264 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +503 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +69 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +237 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +2 -0
- azure/ai/evaluation/red_team/_red_team.py +572 -207
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +121 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +570 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +108 -0
- azure/ai/evaluation/red_team/_utils/constants.py +5 -1
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +2 -2
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +2 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +9 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +3 -3
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +3 -3
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +3 -0
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +15 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +6 -5
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/METADATA +35 -3
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/RECORD +69 -61
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/top_level.txt +0 -0
|
@@ -39,6 +39,7 @@ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarg
|
|
|
39
39
|
from azure.ai.evaluation._common.math import list_mean_nan_safe, is_none_or_nan
|
|
40
40
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
41
41
|
from azure.ai.evaluation import evaluate
|
|
42
|
+
from azure.ai.evaluation._common import RedTeamUpload, ResultType
|
|
42
43
|
|
|
43
44
|
# Azure Core imports
|
|
44
45
|
from azure.core.credentials import TokenCredential
|
|
@@ -47,6 +48,9 @@ from azure.core.credentials import TokenCredential
|
|
|
47
48
|
from ._red_team_result import RedTeamResult, RedTeamingScorecard, RedTeamingParameters, ScanResult
|
|
48
49
|
from ._attack_strategy import AttackStrategy
|
|
49
50
|
from ._attack_objective_generator import RiskCategory, _AttackObjectiveGenerator
|
|
51
|
+
from ._utils._rai_service_target import AzureRAIServiceTarget
|
|
52
|
+
from ._utils._rai_service_true_false_scorer import AzureRAIServiceTrueFalseScorer
|
|
53
|
+
from ._utils._rai_service_eval_chat_target import RAIServiceEvalChatTarget
|
|
50
54
|
|
|
51
55
|
# PyRIT imports
|
|
52
56
|
from pyrit.common import initialize_pyrit, DUCK_DB
|
|
@@ -54,9 +58,11 @@ from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget
|
|
|
54
58
|
from pyrit.models import ChatMessage
|
|
55
59
|
from pyrit.memory import CentralMemory
|
|
56
60
|
from pyrit.orchestrator.single_turn.prompt_sending_orchestrator import PromptSendingOrchestrator
|
|
61
|
+
from pyrit.orchestrator.multi_turn.red_teaming_orchestrator import RedTeamingOrchestrator
|
|
57
62
|
from pyrit.orchestrator import Orchestrator
|
|
58
63
|
from pyrit.exceptions import PyritException
|
|
59
64
|
from pyrit.prompt_converter import PromptConverter, MathPromptConverter, Base64Converter, FlipConverter, MorseConverter, AnsiAttackConverter, AsciiArtConverter, AsciiSmugglerConverter, AtbashConverter, BinaryConverter, CaesarConverter, CharacterSpaceConverter, CharSwapGenerator, DiacriticConverter, LeetspeakConverter, UrlConverter, UnicodeSubstitutionConverter, UnicodeConfusableConverter, SuffixAppendConverter, StringJoinConverter, ROT13Converter
|
|
65
|
+
from pyrit.orchestrator.multi_turn.crescendo_orchestrator import CrescendoOrchestrator
|
|
60
66
|
|
|
61
67
|
# Retry imports
|
|
62
68
|
import httpx
|
|
@@ -77,7 +83,7 @@ from ._utils.logging_utils import (
|
|
|
77
83
|
)
|
|
78
84
|
|
|
79
85
|
@experimental
|
|
80
|
-
class RedTeam
|
|
86
|
+
class RedTeam:
|
|
81
87
|
"""
|
|
82
88
|
This class uses various attack strategies to test the robustness of AI models against adversarial inputs.
|
|
83
89
|
It logs the results of these evaluations and provides detailed scorecards summarizing the attack success rates.
|
|
@@ -215,11 +221,12 @@ class RedTeam():
|
|
|
215
221
|
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
216
222
|
self.credential = credential
|
|
217
223
|
self.output_dir = output_dir
|
|
218
|
-
|
|
224
|
+
self._one_dp_project = is_onedp_project(azure_ai_project)
|
|
225
|
+
|
|
219
226
|
# Initialize logger without output directory (will be updated during scan)
|
|
220
227
|
self.logger = setup_logger()
|
|
221
228
|
|
|
222
|
-
if not
|
|
229
|
+
if not self._one_dp_project:
|
|
223
230
|
self.token_manager = ManagedIdentityAPITokenManager(
|
|
224
231
|
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
225
232
|
logger=logging.getLogger("RedTeamLogger"),
|
|
@@ -241,7 +248,7 @@ class RedTeam():
|
|
|
241
248
|
self.scan_id = None
|
|
242
249
|
self.scan_output_dir = None
|
|
243
250
|
|
|
244
|
-
self.generated_rai_client = GeneratedRAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager.
|
|
251
|
+
self.generated_rai_client = GeneratedRAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager.credential) #type: ignore
|
|
245
252
|
|
|
246
253
|
# Initialize a cache for attack objectives by risk category and strategy
|
|
247
254
|
self.attack_objectives = {}
|
|
@@ -276,52 +283,67 @@ class RedTeam():
|
|
|
276
283
|
:raises EvaluationException: If no azure_ai_project is provided or trace destination cannot be determined
|
|
277
284
|
"""
|
|
278
285
|
if not azure_ai_project:
|
|
279
|
-
log_error(self.logger, "No azure_ai_project provided, cannot
|
|
286
|
+
log_error(self.logger, "No azure_ai_project provided, cannot upload run")
|
|
280
287
|
raise EvaluationException(
|
|
281
288
|
message="No azure_ai_project provided",
|
|
282
289
|
blame=ErrorBlame.USER_ERROR,
|
|
283
290
|
category=ErrorCategory.MISSING_FIELD,
|
|
284
291
|
target=ErrorTarget.RED_TEAM
|
|
285
292
|
)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
blame=ErrorBlame.SYSTEM_ERROR,
|
|
293
|
-
category=ErrorCategory.UNKNOWN,
|
|
294
|
-
target=ErrorTarget.RED_TEAM
|
|
293
|
+
|
|
294
|
+
if self._one_dp_project:
|
|
295
|
+
response = self.generated_rai_client._evaluation_onedp_client.start_red_team_run(
|
|
296
|
+
red_team=RedTeamUpload(
|
|
297
|
+
scan_name=run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
|
|
298
|
+
)
|
|
295
299
|
)
|
|
296
|
-
|
|
297
|
-
ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
|
|
298
|
-
|
|
299
|
-
management_client = LiteMLClient(
|
|
300
|
-
subscription_id=ws_triad.subscription_id,
|
|
301
|
-
resource_group=ws_triad.resource_group_name,
|
|
302
|
-
logger=self.logger,
|
|
303
|
-
credential=azure_ai_project.get("credential")
|
|
304
|
-
)
|
|
305
|
-
|
|
306
|
-
tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
|
|
307
|
-
|
|
308
|
-
run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
309
|
-
self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
|
|
310
|
-
eval_run = EvalRun(
|
|
311
|
-
run_name=run_display_name,
|
|
312
|
-
tracking_uri=cast(str, tracking_uri),
|
|
313
|
-
subscription_id=ws_triad.subscription_id,
|
|
314
|
-
group_name=ws_triad.resource_group_name,
|
|
315
|
-
workspace_name=ws_triad.workspace_name,
|
|
316
|
-
management_client=management_client, # type: ignore
|
|
317
|
-
)
|
|
318
|
-
eval_run._start_run()
|
|
319
|
-
self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
|
|
320
300
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
301
|
+
self.ai_studio_url = response.properties.get("AiStudioEvaluationUri")
|
|
302
|
+
|
|
303
|
+
return response
|
|
304
|
+
|
|
305
|
+
else:
|
|
306
|
+
trace_destination = _trace_destination_from_project_scope(azure_ai_project)
|
|
307
|
+
if not trace_destination:
|
|
308
|
+
self.logger.warning("Could not determine trace destination from project scope")
|
|
309
|
+
raise EvaluationException(
|
|
310
|
+
message="Could not determine trace destination",
|
|
311
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
312
|
+
category=ErrorCategory.UNKNOWN,
|
|
313
|
+
target=ErrorTarget.RED_TEAM
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
|
|
317
|
+
|
|
318
|
+
management_client = LiteMLClient(
|
|
319
|
+
subscription_id=ws_triad.subscription_id,
|
|
320
|
+
resource_group=ws_triad.resource_group_name,
|
|
321
|
+
logger=self.logger,
|
|
322
|
+
credential=azure_ai_project.get("credential")
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
|
|
326
|
+
|
|
327
|
+
run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
328
|
+
self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
|
|
329
|
+
eval_run = EvalRun(
|
|
330
|
+
run_name=run_display_name,
|
|
331
|
+
tracking_uri=cast(str, tracking_uri),
|
|
332
|
+
subscription_id=ws_triad.subscription_id,
|
|
333
|
+
group_name=ws_triad.resource_group_name,
|
|
334
|
+
workspace_name=ws_triad.workspace_name,
|
|
335
|
+
management_client=management_client, # type: ignore
|
|
336
|
+
)
|
|
337
|
+
eval_run._start_run()
|
|
338
|
+
self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
|
|
339
|
+
|
|
340
|
+
self.trace_destination = trace_destination
|
|
341
|
+
self.logger.debug(f"MLFlow run created successfully with ID: {eval_run}")
|
|
342
|
+
|
|
343
|
+
self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination,
|
|
344
|
+
evaluation_id=eval_run.info.run_id)
|
|
345
|
+
|
|
346
|
+
return eval_run
|
|
325
347
|
|
|
326
348
|
|
|
327
349
|
async def _log_redteam_results_to_mlflow(
|
|
@@ -343,58 +365,59 @@ class RedTeam():
|
|
|
343
365
|
"""
|
|
344
366
|
self.logger.debug(f"Logging results to MLFlow, _skip_evals={_skip_evals}")
|
|
345
367
|
artifact_name = "instance_results.json"
|
|
368
|
+
eval_info_name = "redteam_info.json"
|
|
369
|
+
properties = {}
|
|
346
370
|
|
|
347
371
|
# If we have a scan output directory, save the results there first
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
self
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
372
|
+
import tempfile
|
|
373
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
374
|
+
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
375
|
+
artifact_path = os.path.join(self.scan_output_dir, artifact_name)
|
|
376
|
+
self.logger.debug(f"Saving artifact to scan output directory: {artifact_path}")
|
|
377
|
+
with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
378
|
+
if _skip_evals:
|
|
379
|
+
# In _skip_evals mode, we write the conversations in conversation/messages format
|
|
380
|
+
f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
|
|
381
|
+
elif redteam_result.scan_result:
|
|
382
|
+
# Create a copy to avoid modifying the original scan result
|
|
383
|
+
result_with_conversations = redteam_result.scan_result.copy() if isinstance(redteam_result.scan_result, dict) else {}
|
|
384
|
+
|
|
385
|
+
# Preserve all original fields needed for scorecard generation
|
|
386
|
+
result_with_conversations["scorecard"] = result_with_conversations.get("scorecard", {})
|
|
387
|
+
result_with_conversations["parameters"] = result_with_conversations.get("parameters", {})
|
|
388
|
+
|
|
389
|
+
# Add conversations field with all conversation data including user messages
|
|
390
|
+
result_with_conversations["conversations"] = redteam_result.attack_details or []
|
|
391
|
+
|
|
392
|
+
# Keep original attack_details field to preserve compatibility with existing code
|
|
393
|
+
if "attack_details" not in result_with_conversations and redteam_result.attack_details is not None:
|
|
394
|
+
result_with_conversations["attack_details"] = redteam_result.attack_details
|
|
395
|
+
|
|
396
|
+
json.dump(result_with_conversations, f)
|
|
397
|
+
|
|
398
|
+
eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
|
|
399
|
+
self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
|
|
400
|
+
with open(eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
401
|
+
# Remove evaluation_result from red_team_info before logging
|
|
402
|
+
red_team_info_logged = {}
|
|
403
|
+
for strategy, harms_dict in self.red_team_info.items():
|
|
404
|
+
red_team_info_logged[strategy] = {}
|
|
405
|
+
for harm, info_dict in harms_dict.items():
|
|
406
|
+
info_dict.pop("evaluation_result", None)
|
|
407
|
+
red_team_info_logged[strategy][harm] = info_dict
|
|
408
|
+
f.write(json.dumps(red_team_info_logged))
|
|
409
|
+
|
|
410
|
+
# Also save a human-readable scorecard if available
|
|
411
|
+
if not _skip_evals and redteam_result.scan_result:
|
|
412
|
+
scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
|
|
413
|
+
with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
414
|
+
f.write(self._to_scorecard(redteam_result.scan_result))
|
|
415
|
+
self.logger.debug(f"Saved scorecard to: {scorecard_path}")
|
|
369
416
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
eval_info_name = "redteam_info.json"
|
|
373
|
-
eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
|
|
374
|
-
self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
|
|
375
|
-
with open(eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
376
|
-
# Remove evaluation_result from red_team_info before logging
|
|
377
|
-
red_team_info_logged = {}
|
|
378
|
-
for strategy, harms_dict in self.red_team_info.items():
|
|
379
|
-
red_team_info_logged[strategy] = {}
|
|
380
|
-
for harm, info_dict in harms_dict.items():
|
|
381
|
-
info_dict.pop("evaluation_result", None)
|
|
382
|
-
red_team_info_logged[strategy][harm] = info_dict
|
|
383
|
-
f.write(json.dumps(red_team_info_logged))
|
|
384
|
-
|
|
385
|
-
# Also save a human-readable scorecard if available
|
|
386
|
-
if not _skip_evals and redteam_result.scan_result:
|
|
387
|
-
scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
|
|
388
|
-
with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
389
|
-
f.write(self._to_scorecard(redteam_result.scan_result))
|
|
390
|
-
self.logger.debug(f"Saved scorecard to: {scorecard_path}")
|
|
417
|
+
# Create a dedicated artifacts directory with proper structure for MLFlow
|
|
418
|
+
# MLFlow requires the artifact_name file to be in the directory we're logging
|
|
391
419
|
|
|
392
|
-
|
|
393
|
-
# MLFlow requires the artifact_name file to be in the directory we're logging
|
|
394
|
-
|
|
395
|
-
import tempfile
|
|
396
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
397
|
-
# First, create the main artifact file that MLFlow expects
|
|
420
|
+
# First, create the main artifact file that MLFlow expects
|
|
398
421
|
with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
399
422
|
if _skip_evals:
|
|
400
423
|
f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
|
|
@@ -425,51 +448,89 @@ class RedTeam():
|
|
|
425
448
|
self.logger.warning(f"Failed to copy file {file} to artifact directory: {str(e)}")
|
|
426
449
|
|
|
427
450
|
# Log the entire directory to MLFlow
|
|
428
|
-
try:
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
except Exception as e:
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
self.logger.debug("Logged scan_output_dir property to MLFlow")
|
|
439
|
-
except Exception as e:
|
|
440
|
-
self.logger.warning(f"Failed to log scan_output_dir property to MLFlow: {str(e)}")
|
|
441
|
-
else:
|
|
442
|
-
# Use temporary directory as before if no scan output directory exists
|
|
443
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
451
|
+
# try:
|
|
452
|
+
# eval_run.log_artifact(tmpdir, artifact_name)
|
|
453
|
+
# eval_run.log_artifact(tmpdir, eval_info_name)
|
|
454
|
+
# self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
|
|
455
|
+
# except Exception as e:
|
|
456
|
+
# self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
|
|
457
|
+
|
|
458
|
+
properties.update({"scan_output_dir": str(self.scan_output_dir)})
|
|
459
|
+
else:
|
|
460
|
+
# Use temporary directory as before if no scan output directory exists
|
|
444
461
|
artifact_file = Path(tmpdir) / artifact_name
|
|
445
462
|
with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
446
463
|
if _skip_evals:
|
|
447
464
|
f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
|
|
448
465
|
elif redteam_result.scan_result:
|
|
449
466
|
json.dump(redteam_result.scan_result, f)
|
|
450
|
-
eval_run.log_artifact(tmpdir, artifact_name)
|
|
467
|
+
# eval_run.log_artifact(tmpdir, artifact_name)
|
|
451
468
|
self.logger.debug(f"Logged artifact: {artifact_name}")
|
|
452
469
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
470
|
+
properties.update({
|
|
471
|
+
"redteaming": "asr", # Red team agent specific run properties to help UI identify this as a redteaming run
|
|
472
|
+
EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
|
|
473
|
+
})
|
|
474
|
+
|
|
475
|
+
metrics = {}
|
|
476
|
+
if redteam_result.scan_result:
|
|
477
|
+
scorecard = redteam_result.scan_result["scorecard"]
|
|
478
|
+
joint_attack_summary = scorecard["joint_risk_attack_summary"]
|
|
479
|
+
|
|
480
|
+
if joint_attack_summary:
|
|
481
|
+
for risk_category_summary in joint_attack_summary:
|
|
482
|
+
risk_category = risk_category_summary.get("risk_category").lower()
|
|
483
|
+
for key, value in risk_category_summary.items():
|
|
484
|
+
if key != "risk_category":
|
|
485
|
+
metrics.update({
|
|
486
|
+
f"{risk_category}_{key}": cast(float, value)
|
|
487
|
+
})
|
|
488
|
+
# eval_run.log_metric(f"{risk_category}_{key}", cast(float, value))
|
|
489
|
+
self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
|
|
490
|
+
|
|
491
|
+
if self._one_dp_project:
|
|
492
|
+
try:
|
|
493
|
+
create_evaluation_result_response = self.generated_rai_client._evaluation_onedp_client.create_evaluation_result(
|
|
494
|
+
name=uuid.uuid4(),
|
|
495
|
+
path=tmpdir,
|
|
496
|
+
metrics=metrics,
|
|
497
|
+
result_type=ResultType.REDTEAM
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
update_run_response = self.generated_rai_client._evaluation_onedp_client.update_red_team_run(
|
|
501
|
+
name=eval_run.id,
|
|
502
|
+
red_team=RedTeamUpload(
|
|
503
|
+
id=eval_run.id,
|
|
504
|
+
scan_name=eval_run.scan_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
|
|
505
|
+
status="Completed",
|
|
506
|
+
outputs={
|
|
507
|
+
'evaluationResultId': create_evaluation_result_response.id,
|
|
508
|
+
},
|
|
509
|
+
properties=properties,
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
self.logger.debug(f"Updated UploadRun: {update_run_response.id}")
|
|
513
|
+
except Exception as e:
|
|
514
|
+
self.logger.warning(f"Failed to upload red team results to AI Foundry: {str(e)}")
|
|
515
|
+
else:
|
|
516
|
+
# Log the entire directory to MLFlow
|
|
517
|
+
try:
|
|
518
|
+
eval_run.log_artifact(tmpdir, artifact_name)
|
|
519
|
+
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
520
|
+
eval_run.log_artifact(tmpdir, eval_info_name)
|
|
521
|
+
self.logger.debug(f"Successfully logged artifacts directory to AI Foundry")
|
|
522
|
+
except Exception as e:
|
|
523
|
+
self.logger.warning(f"Failed to log artifacts to AI Foundry: {str(e)}")
|
|
524
|
+
|
|
525
|
+
for k,v in metrics.items():
|
|
526
|
+
eval_run.log_metric(k, v)
|
|
527
|
+
self.logger.debug(f"Logged metric: {k} = {v}")
|
|
528
|
+
|
|
529
|
+
eval_run.write_properties_to_run_history(properties)
|
|
530
|
+
|
|
531
|
+
eval_run._end_run("FINISHED")
|
|
532
|
+
|
|
533
|
+
self.logger.info("Successfully logged results to AI Foundry")
|
|
473
534
|
return None
|
|
474
535
|
|
|
475
536
|
# Using the utility function from strategy_utils.py instead
|
|
@@ -801,9 +862,11 @@ class RedTeam():
|
|
|
801
862
|
chat_target: PromptChatTarget,
|
|
802
863
|
all_prompts: List[str],
|
|
803
864
|
converter: Union[PromptConverter, List[PromptConverter]],
|
|
865
|
+
*,
|
|
804
866
|
strategy_name: str = "unknown",
|
|
805
|
-
|
|
806
|
-
|
|
867
|
+
risk_category_name: str = "unknown",
|
|
868
|
+
risk_category: Optional[RiskCategory] = None,
|
|
869
|
+
timeout: int = 120,
|
|
807
870
|
) -> Orchestrator:
|
|
808
871
|
"""Send prompts via the PromptSendingOrchestrator with optimized performance.
|
|
809
872
|
|
|
@@ -821,6 +884,8 @@ class RedTeam():
|
|
|
821
884
|
:type converter: Union[PromptConverter, List[PromptConverter]]
|
|
822
885
|
:param strategy_name: Name of the attack strategy being used
|
|
823
886
|
:type strategy_name: str
|
|
887
|
+
:param risk_category_name: Name of the risk category being evaluated
|
|
888
|
+
:type risk_category_name: str
|
|
824
889
|
:param risk_category: Risk category being evaluated
|
|
825
890
|
:type risk_category: str
|
|
826
891
|
:param timeout: Timeout in seconds for each prompt
|
|
@@ -828,10 +893,10 @@ class RedTeam():
|
|
|
828
893
|
:return: Configured and initialized orchestrator
|
|
829
894
|
:rtype: Orchestrator
|
|
830
895
|
"""
|
|
831
|
-
task_key = f"{strategy_name}_{
|
|
896
|
+
task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
|
|
832
897
|
self.task_statuses[task_key] = TASK_STATUS["RUNNING"]
|
|
833
898
|
|
|
834
|
-
log_strategy_start(self.logger, strategy_name,
|
|
899
|
+
log_strategy_start(self.logger, strategy_name, risk_category_name)
|
|
835
900
|
|
|
836
901
|
# Create converter list from single converter or list of converters
|
|
837
902
|
converter_list = [converter] if converter and isinstance(converter, PromptConverter) else converter if converter else []
|
|
@@ -854,7 +919,7 @@ class RedTeam():
|
|
|
854
919
|
)
|
|
855
920
|
|
|
856
921
|
if not all_prompts:
|
|
857
|
-
self.logger.warning(f"No prompts provided to orchestrator for {strategy_name}/{
|
|
922
|
+
self.logger.warning(f"No prompts provided to orchestrator for {strategy_name}/{risk_category_name}")
|
|
858
923
|
self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
|
|
859
924
|
return orchestrator
|
|
860
925
|
|
|
@@ -874,15 +939,15 @@ class RedTeam():
|
|
|
874
939
|
else:
|
|
875
940
|
output_path = f"{base_path}{DATA_EXT}"
|
|
876
941
|
|
|
877
|
-
self.red_team_info[strategy_name][
|
|
942
|
+
self.red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
|
|
878
943
|
|
|
879
944
|
# Process prompts concurrently within each batch
|
|
880
945
|
if len(all_prompts) > batch_size:
|
|
881
|
-
self.logger.debug(f"Processing {len(all_prompts)} prompts in batches of {batch_size} for {strategy_name}/{
|
|
946
|
+
self.logger.debug(f"Processing {len(all_prompts)} prompts in batches of {batch_size} for {strategy_name}/{risk_category_name}")
|
|
882
947
|
batches = [all_prompts[i:i + batch_size] for i in range(0, len(all_prompts), batch_size)]
|
|
883
948
|
|
|
884
949
|
for batch_idx, batch in enumerate(batches):
|
|
885
|
-
self.logger.debug(f"Processing batch {batch_idx+1}/{len(batches)} with {len(batch)} prompts for {strategy_name}/{
|
|
950
|
+
self.logger.debug(f"Processing batch {batch_idx+1}/{len(batches)} with {len(batch)} prompts for {strategy_name}/{risk_category_name}")
|
|
886
951
|
|
|
887
952
|
batch_start_time = datetime.now() # Send prompts in the batch concurrently with a timeout and retry logic
|
|
888
953
|
try: # Create retry decorator for this specific call with enhanced retry strategy
|
|
@@ -897,7 +962,7 @@ class RedTeam():
|
|
|
897
962
|
ConnectionError, TimeoutError, asyncio.TimeoutError, httpcore.ReadTimeout,
|
|
898
963
|
httpx.HTTPStatusError) as e:
|
|
899
964
|
# Log the error with enhanced information and allow retry logic to handle it
|
|
900
|
-
self.logger.warning(f"Network error in batch {batch_idx+1} for {strategy_name}/{
|
|
965
|
+
self.logger.warning(f"Network error in batch {batch_idx+1} for {strategy_name}/{risk_category_name}: {type(e).__name__}: {str(e)}")
|
|
901
966
|
# Add a small delay before retry to allow network recovery
|
|
902
967
|
await asyncio.sleep(1)
|
|
903
968
|
raise
|
|
@@ -905,32 +970,32 @@ class RedTeam():
|
|
|
905
970
|
# Execute the retry-enabled function
|
|
906
971
|
await send_batch_with_retry()
|
|
907
972
|
batch_duration = (datetime.now() - batch_start_time).total_seconds()
|
|
908
|
-
self.logger.debug(f"Successfully processed batch {batch_idx+1} for {strategy_name}/{
|
|
973
|
+
self.logger.debug(f"Successfully processed batch {batch_idx+1} for {strategy_name}/{risk_category_name} in {batch_duration:.2f} seconds")
|
|
909
974
|
|
|
910
975
|
# Print progress to console
|
|
911
976
|
if batch_idx < len(batches) - 1: # Don't print for the last batch
|
|
912
|
-
print(f"Strategy {strategy_name}, Risk {
|
|
977
|
+
print(f"Strategy {strategy_name}, Risk {risk_category_name}: Processed batch {batch_idx+1}/{len(batches)}")
|
|
913
978
|
|
|
914
979
|
except (asyncio.TimeoutError, tenacity.RetryError):
|
|
915
|
-
self.logger.warning(f"Batch {batch_idx+1} for {strategy_name}/{
|
|
916
|
-
self.logger.debug(f"Timeout: Strategy {strategy_name}, Risk {
|
|
917
|
-
print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {
|
|
980
|
+
self.logger.warning(f"Batch {batch_idx+1} for {strategy_name}/{risk_category_name} timed out after {timeout} seconds, continuing with partial results")
|
|
981
|
+
self.logger.debug(f"Timeout: Strategy {strategy_name}, Risk {risk_category_name}, Batch {batch_idx+1} after {timeout} seconds.", exc_info=True)
|
|
982
|
+
print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {batch_idx+1}")
|
|
918
983
|
# Set task status to TIMEOUT
|
|
919
|
-
batch_task_key = f"{strategy_name}_{
|
|
984
|
+
batch_task_key = f"{strategy_name}_{risk_category_name}_batch_{batch_idx+1}"
|
|
920
985
|
self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
|
|
921
|
-
self.red_team_info[strategy_name][
|
|
922
|
-
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=
|
|
986
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
987
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=batch_idx+1)
|
|
923
988
|
# Continue with partial results rather than failing completely
|
|
924
989
|
continue
|
|
925
990
|
except Exception as e:
|
|
926
|
-
log_error(self.logger, f"Error processing batch {batch_idx+1}", e, f"{strategy_name}/{
|
|
927
|
-
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {
|
|
928
|
-
self.red_team_info[strategy_name][
|
|
929
|
-
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=
|
|
991
|
+
log_error(self.logger, f"Error processing batch {batch_idx+1}", e, f"{strategy_name}/{risk_category_name}")
|
|
992
|
+
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category_name}, Batch {batch_idx+1}: {str(e)}")
|
|
993
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
994
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=batch_idx+1)
|
|
930
995
|
# Continue with other batches even if one fails
|
|
931
996
|
continue
|
|
932
997
|
else: # Small number of prompts, process all at once with a timeout and retry logic
|
|
933
|
-
self.logger.debug(f"Processing {len(all_prompts)} prompts in a single batch for {strategy_name}/{
|
|
998
|
+
self.logger.debug(f"Processing {len(all_prompts)} prompts in a single batch for {strategy_name}/{risk_category_name}")
|
|
934
999
|
batch_start_time = datetime.now()
|
|
935
1000
|
try: # Create retry decorator with enhanced retry strategy
|
|
936
1001
|
@retry(**self._create_retry_config()["network_retry"])
|
|
@@ -944,7 +1009,7 @@ class RedTeam():
|
|
|
944
1009
|
ConnectionError, TimeoutError, OSError, asyncio.TimeoutError, httpcore.ReadTimeout,
|
|
945
1010
|
httpx.HTTPStatusError) as e:
|
|
946
1011
|
# Enhanced error logging with type information and context
|
|
947
|
-
self.logger.warning(f"Network error in single batch for {strategy_name}/{
|
|
1012
|
+
self.logger.warning(f"Network error in single batch for {strategy_name}/{risk_category_name}: {type(e).__name__}: {str(e)}")
|
|
948
1013
|
# Add a small delay before retry to allow network recovery
|
|
949
1014
|
await asyncio.sleep(2)
|
|
950
1015
|
raise
|
|
@@ -952,30 +1017,338 @@ class RedTeam():
|
|
|
952
1017
|
# Execute the retry-enabled function
|
|
953
1018
|
await send_all_with_retry()
|
|
954
1019
|
batch_duration = (datetime.now() - batch_start_time).total_seconds()
|
|
955
|
-
self.logger.debug(f"Successfully processed single batch for {strategy_name}/{
|
|
1020
|
+
self.logger.debug(f"Successfully processed single batch for {strategy_name}/{risk_category_name} in {batch_duration:.2f} seconds")
|
|
956
1021
|
except (asyncio.TimeoutError, tenacity.RetryError):
|
|
957
|
-
self.logger.warning(f"Prompt processing for {strategy_name}/{
|
|
958
|
-
print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {
|
|
1022
|
+
self.logger.warning(f"Prompt processing for {strategy_name}/{risk_category_name} timed out after {timeout} seconds, continuing with partial results")
|
|
1023
|
+
print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}")
|
|
959
1024
|
# Set task status to TIMEOUT
|
|
960
|
-
single_batch_task_key = f"{strategy_name}_{
|
|
1025
|
+
single_batch_task_key = f"{strategy_name}_{risk_category_name}_single_batch"
|
|
961
1026
|
self.task_statuses[single_batch_task_key] = TASK_STATUS["TIMEOUT"]
|
|
962
|
-
self.red_team_info[strategy_name][
|
|
963
|
-
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=
|
|
1027
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
1028
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=1)
|
|
964
1029
|
except Exception as e:
|
|
965
|
-
log_error(self.logger, "Error processing prompts", e, f"{strategy_name}/{
|
|
966
|
-
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {
|
|
967
|
-
self.red_team_info[strategy_name][
|
|
968
|
-
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=
|
|
1030
|
+
log_error(self.logger, "Error processing prompts", e, f"{strategy_name}/{risk_category_name}")
|
|
1031
|
+
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category_name}: {str(e)}")
|
|
1032
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
1033
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=1)
|
|
969
1034
|
|
|
970
1035
|
self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
|
|
971
1036
|
return orchestrator
|
|
972
1037
|
|
|
973
1038
|
except Exception as e:
|
|
974
|
-
log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{
|
|
975
|
-
self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{
|
|
1039
|
+
log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{risk_category_name}")
|
|
1040
|
+
self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category_name}: {str(e)}")
|
|
976
1041
|
self.task_statuses[task_key] = TASK_STATUS["FAILED"]
|
|
977
1042
|
raise
|
|
978
1043
|
|
|
1044
|
+
async def _multi_turn_orchestrator(
|
|
1045
|
+
self,
|
|
1046
|
+
chat_target: PromptChatTarget,
|
|
1047
|
+
all_prompts: List[str],
|
|
1048
|
+
converter: Union[PromptConverter, List[PromptConverter]],
|
|
1049
|
+
*,
|
|
1050
|
+
strategy_name: str = "unknown",
|
|
1051
|
+
risk_category_name: str = "unknown",
|
|
1052
|
+
risk_category: Optional[RiskCategory] = None,
|
|
1053
|
+
timeout: int = 120,
|
|
1054
|
+
) -> Orchestrator:
|
|
1055
|
+
"""Send prompts via the RedTeamingOrchestrator, the simplest form of MultiTurnOrchestrator, with optimized performance.
|
|
1056
|
+
|
|
1057
|
+
Creates and configures a PyRIT RedTeamingOrchestrator to efficiently send prompts to the target
|
|
1058
|
+
model or function. The orchestrator handles prompt conversion using the specified converters,
|
|
1059
|
+
applies appropriate timeout settings, and manages the database engine for storing conversation
|
|
1060
|
+
results. This function provides centralized management for prompt-sending operations with proper
|
|
1061
|
+
error handling and performance optimizations.
|
|
1062
|
+
|
|
1063
|
+
:param chat_target: The target to send prompts to
|
|
1064
|
+
:type chat_target: PromptChatTarget
|
|
1065
|
+
:param all_prompts: List of prompts to process and send
|
|
1066
|
+
:type all_prompts: List[str]
|
|
1067
|
+
:param converter: Prompt converter or list of converters to transform prompts
|
|
1068
|
+
:type converter: Union[PromptConverter, List[PromptConverter]]
|
|
1069
|
+
:param strategy_name: Name of the attack strategy being used
|
|
1070
|
+
:type strategy_name: str
|
|
1071
|
+
:param risk_category: Risk category being evaluated
|
|
1072
|
+
:type risk_category: str
|
|
1073
|
+
:param timeout: Timeout in seconds for each prompt
|
|
1074
|
+
:type timeout: int
|
|
1075
|
+
:return: Configured and initialized orchestrator
|
|
1076
|
+
:rtype: Orchestrator
|
|
1077
|
+
"""
|
|
1078
|
+
max_turns = 5 # Set a default max turns value
|
|
1079
|
+
task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
|
|
1080
|
+
self.task_statuses[task_key] = TASK_STATUS["RUNNING"]
|
|
1081
|
+
|
|
1082
|
+
log_strategy_start(self.logger, strategy_name, risk_category_name)
|
|
1083
|
+
converter_list = []
|
|
1084
|
+
# Create converter list from single converter or list of converters
|
|
1085
|
+
if converter and isinstance(converter, PromptConverter):
|
|
1086
|
+
converter_list = [converter]
|
|
1087
|
+
elif converter and isinstance(converter, list):
|
|
1088
|
+
# Filter out None values from the converter list
|
|
1089
|
+
converter_list = [c for c in converter if c is not None]
|
|
1090
|
+
|
|
1091
|
+
# Log which converter is being used
|
|
1092
|
+
if converter_list:
|
|
1093
|
+
if isinstance(converter_list, list) and len(converter_list) > 0:
|
|
1094
|
+
converter_names = [c.__class__.__name__ for c in converter_list if c is not None]
|
|
1095
|
+
self.logger.debug(f"Using converters: {', '.join(converter_names)}")
|
|
1096
|
+
elif converter is not None:
|
|
1097
|
+
self.logger.debug(f"Using converter: {converter.__class__.__name__}")
|
|
1098
|
+
else:
|
|
1099
|
+
self.logger.debug("No converters specified")
|
|
1100
|
+
|
|
1101
|
+
for prompt_idx, prompt in enumerate(all_prompts):
|
|
1102
|
+
prompt_start_time = datetime.now()
|
|
1103
|
+
self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
|
|
1104
|
+
try:
|
|
1105
|
+
azure_rai_service_scorer = AzureRAIServiceTrueFalseScorer(
|
|
1106
|
+
client=self.generated_rai_client,
|
|
1107
|
+
api_version=None,
|
|
1108
|
+
model="gpt-4",
|
|
1109
|
+
# objective=prompt,
|
|
1110
|
+
logger=self.logger,
|
|
1111
|
+
credential=self.credential,
|
|
1112
|
+
risk_category=risk_category,
|
|
1113
|
+
azure_ai_project=self.azure_ai_project,
|
|
1114
|
+
)
|
|
1115
|
+
|
|
1116
|
+
azure_rai_service_target = AzureRAIServiceTarget(
|
|
1117
|
+
client=self.generated_rai_client,
|
|
1118
|
+
api_version=None,
|
|
1119
|
+
model="gpt-4",
|
|
1120
|
+
prompt_template_key="orchestrators/red_teaming/text_generation.yaml",
|
|
1121
|
+
objective=prompt,
|
|
1122
|
+
logger=self.logger,
|
|
1123
|
+
is_one_dp_project=self._one_dp_project,
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
orchestrator = RedTeamingOrchestrator(
|
|
1127
|
+
objective_target=chat_target,
|
|
1128
|
+
adversarial_chat=azure_rai_service_target,
|
|
1129
|
+
# adversarial_chat_seed_prompt=prompt,
|
|
1130
|
+
max_turns=max_turns,
|
|
1131
|
+
prompt_converters=converter_list,
|
|
1132
|
+
objective_scorer=azure_rai_service_scorer,
|
|
1133
|
+
use_score_as_feedback=False,
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
# Debug log the first few characters of the current prompt
|
|
1137
|
+
self.logger.debug(f"Current prompt (truncated): {prompt[:50]}...")
|
|
1138
|
+
|
|
1139
|
+
# Initialize output path for memory labelling
|
|
1140
|
+
base_path = str(uuid.uuid4())
|
|
1141
|
+
|
|
1142
|
+
# If scan output directory exists, place the file there
|
|
1143
|
+
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
1144
|
+
output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
|
|
1145
|
+
else:
|
|
1146
|
+
output_path = f"{base_path}{DATA_EXT}"
|
|
1147
|
+
|
|
1148
|
+
self.red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
|
|
1149
|
+
|
|
1150
|
+
try: # Create retry decorator for this specific call with enhanced retry strategy
|
|
1151
|
+
@retry(**self._create_retry_config()["network_retry"])
|
|
1152
|
+
async def send_prompt_with_retry():
|
|
1153
|
+
try:
|
|
1154
|
+
return await asyncio.wait_for(
|
|
1155
|
+
orchestrator.run_attack_async(objective=prompt, memory_labels={"risk_strategy_path": output_path, "batch": 1}),
|
|
1156
|
+
timeout=timeout # Use provided timeouts
|
|
1157
|
+
)
|
|
1158
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError, httpx.HTTPError,
|
|
1159
|
+
ConnectionError, TimeoutError, asyncio.TimeoutError, httpcore.ReadTimeout,
|
|
1160
|
+
httpx.HTTPStatusError) as e:
|
|
1161
|
+
# Log the error with enhanced information and allow retry logic to handle it
|
|
1162
|
+
self.logger.warning(f"Network error in prompt {prompt_idx+1} for {strategy_name}/{risk_category_name}: {type(e).__name__}: {str(e)}")
|
|
1163
|
+
# Add a small delay before retry to allow network recovery
|
|
1164
|
+
await asyncio.sleep(1)
|
|
1165
|
+
raise
|
|
1166
|
+
|
|
1167
|
+
# Execute the retry-enabled function
|
|
1168
|
+
await send_prompt_with_retry()
|
|
1169
|
+
prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
|
|
1170
|
+
self.logger.debug(f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds")
|
|
1171
|
+
|
|
1172
|
+
# Print progress to console
|
|
1173
|
+
if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
|
|
1174
|
+
print(f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}")
|
|
1175
|
+
|
|
1176
|
+
except (asyncio.TimeoutError, tenacity.RetryError):
|
|
1177
|
+
self.logger.warning(f"Batch {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {timeout} seconds, continuing with partial results")
|
|
1178
|
+
self.logger.debug(f"Timeout: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1} after {timeout} seconds.", exc_info=True)
|
|
1179
|
+
print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1}")
|
|
1180
|
+
# Set task status to TIMEOUT
|
|
1181
|
+
batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
|
|
1182
|
+
self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
|
|
1183
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
1184
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=1)
|
|
1185
|
+
# Continue with partial results rather than failing completely
|
|
1186
|
+
continue
|
|
1187
|
+
except Exception as e:
|
|
1188
|
+
log_error(self.logger, f"Error processing prompt {prompt_idx+1}", e, f"{strategy_name}/{risk_category_name}")
|
|
1189
|
+
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category_name}, Prompt {prompt_idx+1}: {str(e)}")
|
|
1190
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
1191
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=1)
|
|
1192
|
+
# Continue with other batches even if one fails
|
|
1193
|
+
continue
|
|
1194
|
+
except Exception as e:
|
|
1195
|
+
log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{risk_category_name}")
|
|
1196
|
+
self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category_name}: {str(e)}")
|
|
1197
|
+
self.task_statuses[task_key] = TASK_STATUS["FAILED"]
|
|
1198
|
+
raise
|
|
1199
|
+
self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
|
|
1200
|
+
return orchestrator
|
|
1201
|
+
|
|
1202
|
+
async def _crescendo_orchestrator(
|
|
1203
|
+
self,
|
|
1204
|
+
chat_target: PromptChatTarget,
|
|
1205
|
+
all_prompts: List[str],
|
|
1206
|
+
converter: Union[PromptConverter, List[PromptConverter]],
|
|
1207
|
+
*,
|
|
1208
|
+
strategy_name: str = "unknown",
|
|
1209
|
+
risk_category_name: str = "unknown",
|
|
1210
|
+
risk_category: Optional[RiskCategory] = None,
|
|
1211
|
+
timeout: int = 120,
|
|
1212
|
+
) -> Orchestrator:
|
|
1213
|
+
"""Send prompts via the CrescendoOrchestrator with optimized performance.
|
|
1214
|
+
|
|
1215
|
+
Creates and configures a PyRIT CrescendoOrchestrator to send prompts to the target
|
|
1216
|
+
model or function. The orchestrator handles prompt conversion using the specified converters,
|
|
1217
|
+
applies appropriate timeout settings, and manages the database engine for storing conversation
|
|
1218
|
+
results. This function provides centralized management for prompt-sending operations with proper
|
|
1219
|
+
error handling and performance optimizations.
|
|
1220
|
+
|
|
1221
|
+
:param chat_target: The target to send prompts to
|
|
1222
|
+
:type chat_target: PromptChatTarget
|
|
1223
|
+
:param all_prompts: List of prompts to process and send
|
|
1224
|
+
:type all_prompts: List[str]
|
|
1225
|
+
:param converter: Prompt converter or list of converters to transform prompts
|
|
1226
|
+
:type converter: Union[PromptConverter, List[PromptConverter]]
|
|
1227
|
+
:param strategy_name: Name of the attack strategy being used
|
|
1228
|
+
:type strategy_name: str
|
|
1229
|
+
:param risk_category: Risk category being evaluated
|
|
1230
|
+
:type risk_category: str
|
|
1231
|
+
:param timeout: Timeout in seconds for each prompt
|
|
1232
|
+
:type timeout: int
|
|
1233
|
+
:return: Configured and initialized orchestrator
|
|
1234
|
+
:rtype: Orchestrator
|
|
1235
|
+
"""
|
|
1236
|
+
max_turns = 10 # Set a default max turns value
|
|
1237
|
+
max_backtracks = 5
|
|
1238
|
+
task_key = f"{strategy_name}_{risk_category_name}_orchestrator"
|
|
1239
|
+
self.task_statuses[task_key] = TASK_STATUS["RUNNING"]
|
|
1240
|
+
|
|
1241
|
+
log_strategy_start(self.logger, strategy_name, risk_category_name)
|
|
1242
|
+
|
|
1243
|
+
# Initialize output path for memory labelling
|
|
1244
|
+
base_path = str(uuid.uuid4())
|
|
1245
|
+
|
|
1246
|
+
# If scan output directory exists, place the file there
|
|
1247
|
+
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
1248
|
+
output_path = os.path.join(self.scan_output_dir, f"{base_path}{DATA_EXT}")
|
|
1249
|
+
else:
|
|
1250
|
+
output_path = f"{base_path}{DATA_EXT}"
|
|
1251
|
+
|
|
1252
|
+
self.red_team_info[strategy_name][risk_category_name]["data_file"] = output_path
|
|
1253
|
+
|
|
1254
|
+
for prompt_idx, prompt in enumerate(all_prompts):
|
|
1255
|
+
prompt_start_time = datetime.now()
|
|
1256
|
+
self.logger.debug(f"Processing prompt {prompt_idx+1}/{len(all_prompts)}")
|
|
1257
|
+
try:
|
|
1258
|
+
red_llm_scoring_target = RAIServiceEvalChatTarget(
|
|
1259
|
+
logger=self.logger,
|
|
1260
|
+
credential=self.credential,
|
|
1261
|
+
risk_category=risk_category,
|
|
1262
|
+
azure_ai_project=self.azure_ai_project,
|
|
1263
|
+
)
|
|
1264
|
+
|
|
1265
|
+
azure_rai_service_target = AzureRAIServiceTarget(
|
|
1266
|
+
client=self.generated_rai_client,
|
|
1267
|
+
api_version=None,
|
|
1268
|
+
model="gpt-4",
|
|
1269
|
+
prompt_template_key="orchestrators/crescendo/crescendo_variant_1.yaml",
|
|
1270
|
+
objective=prompt,
|
|
1271
|
+
logger=self.logger,
|
|
1272
|
+
crescendo_format=True,
|
|
1273
|
+
is_one_dp_project=self._one_dp_project,
|
|
1274
|
+
)
|
|
1275
|
+
|
|
1276
|
+
orchestrator = CrescendoOrchestrator(
|
|
1277
|
+
objective_target=chat_target,
|
|
1278
|
+
adversarial_chat=azure_rai_service_target,
|
|
1279
|
+
max_turns=max_turns,
|
|
1280
|
+
scoring_target=red_llm_scoring_target,
|
|
1281
|
+
max_backtracks=max_backtracks,
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
orchestrator._objective_scorer = AzureRAIServiceTrueFalseScorer(
|
|
1285
|
+
client=self.generated_rai_client,
|
|
1286
|
+
api_version=None,
|
|
1287
|
+
model="gpt-4",
|
|
1288
|
+
# objective=prompt,
|
|
1289
|
+
logger=self.logger,
|
|
1290
|
+
credential=self.credential,
|
|
1291
|
+
risk_category=risk_category,
|
|
1292
|
+
azure_ai_project=self.azure_ai_project,
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
# Debug log the first few characters of the current prompt
|
|
1296
|
+
self.logger.debug(f"Current prompt (truncated): {prompt[:50]}...")
|
|
1297
|
+
|
|
1298
|
+
try: # Create retry decorator for this specific call with enhanced retry strategy
|
|
1299
|
+
@retry(**self._create_retry_config()["network_retry"])
|
|
1300
|
+
async def send_prompt_with_retry():
|
|
1301
|
+
try:
|
|
1302
|
+
return await asyncio.wait_for(
|
|
1303
|
+
orchestrator.run_attack_async(objective=prompt, memory_labels={"risk_strategy_path": output_path, "batch": prompt_idx+1}),
|
|
1304
|
+
timeout=timeout # Use provided timeouts
|
|
1305
|
+
)
|
|
1306
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.ConnectError, httpx.HTTPError,
|
|
1307
|
+
ConnectionError, TimeoutError, asyncio.TimeoutError, httpcore.ReadTimeout,
|
|
1308
|
+
httpx.HTTPStatusError) as e:
|
|
1309
|
+
# Log the error with enhanced information and allow retry logic to handle it
|
|
1310
|
+
self.logger.warning(f"Network error in prompt {prompt_idx+1} for {strategy_name}/{risk_category_name}: {type(e).__name__}: {str(e)}")
|
|
1311
|
+
# Add a small delay before retry to allow network recovery
|
|
1312
|
+
await asyncio.sleep(1)
|
|
1313
|
+
raise
|
|
1314
|
+
|
|
1315
|
+
# Execute the retry-enabled function
|
|
1316
|
+
await send_prompt_with_retry()
|
|
1317
|
+
prompt_duration = (datetime.now() - prompt_start_time).total_seconds()
|
|
1318
|
+
self.logger.debug(f"Successfully processed prompt {prompt_idx+1} for {strategy_name}/{risk_category_name} in {prompt_duration:.2f} seconds")
|
|
1319
|
+
|
|
1320
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=prompt_idx+1)
|
|
1321
|
+
|
|
1322
|
+
# Print progress to console
|
|
1323
|
+
if prompt_idx < len(all_prompts) - 1: # Don't print for the last prompt
|
|
1324
|
+
print(f"Strategy {strategy_name}, Risk {risk_category_name}: Processed prompt {prompt_idx+1}/{len(all_prompts)}")
|
|
1325
|
+
|
|
1326
|
+
except (asyncio.TimeoutError, tenacity.RetryError):
|
|
1327
|
+
self.logger.warning(f"Batch {prompt_idx+1} for {strategy_name}/{risk_category_name} timed out after {timeout} seconds, continuing with partial results")
|
|
1328
|
+
self.logger.debug(f"Timeout: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1} after {timeout} seconds.", exc_info=True)
|
|
1329
|
+
print(f"⚠️ TIMEOUT: Strategy {strategy_name}, Risk {risk_category_name}, Batch {prompt_idx+1}")
|
|
1330
|
+
# Set task status to TIMEOUT
|
|
1331
|
+
batch_task_key = f"{strategy_name}_{risk_category_name}_prompt_{prompt_idx+1}"
|
|
1332
|
+
self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"]
|
|
1333
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
1334
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=prompt_idx+1)
|
|
1335
|
+
# Continue with partial results rather than failing completely
|
|
1336
|
+
continue
|
|
1337
|
+
except Exception as e:
|
|
1338
|
+
log_error(self.logger, f"Error processing prompt {prompt_idx+1}", e, f"{strategy_name}/{risk_category_name}")
|
|
1339
|
+
self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category_name}, Prompt {prompt_idx+1}: {str(e)}")
|
|
1340
|
+
self.red_team_info[strategy_name][risk_category_name]["status"] = TASK_STATUS["INCOMPLETE"]
|
|
1341
|
+
self._write_pyrit_outputs_to_file(orchestrator=orchestrator, strategy_name=strategy_name, risk_category=risk_category_name, batch_idx=prompt_idx+1)
|
|
1342
|
+
# Continue with other batches even if one fails
|
|
1343
|
+
continue
|
|
1344
|
+
except Exception as e:
|
|
1345
|
+
log_error(self.logger, "Failed to initialize orchestrator", e, f"{strategy_name}/{risk_category_name}")
|
|
1346
|
+
self.logger.debug(f"CRITICAL: Failed to create orchestrator for {strategy_name}/{risk_category_name}: {str(e)}")
|
|
1347
|
+
self.task_statuses[task_key] = TASK_STATUS["FAILED"]
|
|
1348
|
+
raise
|
|
1349
|
+
self.task_statuses[task_key] = TASK_STATUS["COMPLETED"]
|
|
1350
|
+
return orchestrator
|
|
1351
|
+
|
|
979
1352
|
def _write_pyrit_outputs_to_file(self,*, orchestrator: Orchestrator, strategy_name: str, risk_category: str, batch_idx: Optional[int] = None) -> str:
|
|
980
1353
|
"""Write PyRIT outputs to a file with a name based on orchestrator, strategy, and risk category.
|
|
981
1354
|
|
|
@@ -1018,6 +1391,9 @@ class RedTeam():
|
|
|
1018
1391
|
#Convert to json lines
|
|
1019
1392
|
json_lines = ""
|
|
1020
1393
|
for conversation in conversations: # each conversation is a List[ChatMessage]
|
|
1394
|
+
if conversation[0].role == "system":
|
|
1395
|
+
# Skip system messages in the output
|
|
1396
|
+
continue
|
|
1021
1397
|
json_lines += json.dumps({"conversation": {"messages": [self._message_to_dict(message) for message in conversation]}}) + "\n"
|
|
1022
1398
|
with Path(output_path).open("w") as f:
|
|
1023
1399
|
f.writelines(json_lines)
|
|
@@ -1031,7 +1407,11 @@ class RedTeam():
|
|
|
1031
1407
|
self.logger.debug(f"Creating new file: {output_path}")
|
|
1032
1408
|
#Convert to json lines
|
|
1033
1409
|
json_lines = ""
|
|
1410
|
+
|
|
1034
1411
|
for conversation in conversations: # each conversation is a List[ChatMessage]
|
|
1412
|
+
if conversation[0].role == "system":
|
|
1413
|
+
# Skip system messages in the output
|
|
1414
|
+
continue
|
|
1035
1415
|
json_lines += json.dumps({"conversation": {"messages": [self._message_to_dict(message) for message in conversation]}}) + "\n"
|
|
1036
1416
|
with Path(output_path).open("w") as f:
|
|
1037
1417
|
f.writelines(json_lines)
|
|
@@ -1055,32 +1435,31 @@ class RedTeam():
|
|
|
1055
1435
|
from ._utils.strategy_utils import get_chat_target
|
|
1056
1436
|
return get_chat_target(target)
|
|
1057
1437
|
|
|
1438
|
+
|
|
1058
1439
|
# Replace with utility function
|
|
1059
|
-
def
|
|
1060
|
-
"""Get appropriate orchestrator functions for the specified attack
|
|
1440
|
+
def _get_orchestrator_for_attack_strategy(self, attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> Callable:
|
|
1441
|
+
"""Get appropriate orchestrator functions for the specified attack strategy.
|
|
1061
1442
|
|
|
1062
|
-
Determines which orchestrator functions should be used based on the attack strategies.
|
|
1443
|
+
Determines which orchestrator functions should be used based on the attack strategies, max turns.
|
|
1063
1444
|
Returns a list of callable functions that can create orchestrators configured for the
|
|
1064
1445
|
specified strategies. This function is crucial for mapping strategies to the appropriate
|
|
1065
1446
|
execution environment.
|
|
1066
1447
|
|
|
1067
1448
|
:param attack_strategy: List of attack strategies to get orchestrators for
|
|
1068
|
-
:type attack_strategy:
|
|
1449
|
+
:type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
|
|
1069
1450
|
:return: List of callable functions that create appropriately configured orchestrators
|
|
1070
1451
|
:rtype: List[Callable]
|
|
1071
1452
|
"""
|
|
1072
1453
|
# We need to modify this to use our actual _prompt_sending_orchestrator since the utility function can't access it
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
elif AttackStrategy.
|
|
1078
|
-
|
|
1079
|
-
elif AttackStrategy.
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
call_to_orchestrators.extend([self._prompt_sending_orchestrator])
|
|
1083
|
-
return call_to_orchestrators
|
|
1454
|
+
if isinstance(attack_strategy, list):
|
|
1455
|
+
if AttackStrategy.MultiTurn in attack_strategy or AttackStrategy.Crescendo in attack_strategy:
|
|
1456
|
+
self.logger.error("MultiTurn and Crescendo strategies are not supported in composed attacks.")
|
|
1457
|
+
raise ValueError("MultiTurn and Crescendo strategies are not supported in composed attacks.")
|
|
1458
|
+
elif AttackStrategy.MultiTurn == attack_strategy:
|
|
1459
|
+
return self._multi_turn_orchestrator
|
|
1460
|
+
elif AttackStrategy.Crescendo == attack_strategy:
|
|
1461
|
+
return self._crescendo_orchestrator
|
|
1462
|
+
return self._prompt_sending_orchestrator
|
|
1084
1463
|
|
|
1085
1464
|
# Replace with utility function
|
|
1086
1465
|
def _get_attack_success(self, result: str) -> bool:
|
|
@@ -1622,11 +2001,6 @@ class RedTeam():
|
|
|
1622
2001
|
metric_name = get_metric_from_risk_category(risk_category)
|
|
1623
2002
|
self.logger.debug(f"Using metric '{metric_name}' for risk category '{risk_category.value}'")
|
|
1624
2003
|
|
|
1625
|
-
# Convert hate_unfairness to hate_fairness for RAI service compatibility
|
|
1626
|
-
if metric_name == "hate_unfairness":
|
|
1627
|
-
metric_name = "hate_fairness"
|
|
1628
|
-
self.logger.debug(f"Converted metric name to '{metric_name}' for compatibility with RAI service")
|
|
1629
|
-
|
|
1630
2004
|
# Load all conversations from the data file
|
|
1631
2005
|
conversations = []
|
|
1632
2006
|
try:
|
|
@@ -1680,8 +2054,6 @@ class RedTeam():
|
|
|
1680
2054
|
|
|
1681
2055
|
async def _process_attack(
|
|
1682
2056
|
self,
|
|
1683
|
-
target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
|
|
1684
|
-
call_orchestrator: Callable,
|
|
1685
2057
|
strategy: Union[AttackStrategy, List[AttackStrategy]],
|
|
1686
2058
|
risk_category: RiskCategory,
|
|
1687
2059
|
all_prompts: List[str],
|
|
@@ -1700,10 +2072,6 @@ class RedTeam():
|
|
|
1700
2072
|
appropriate converter, saving results to files, and optionally evaluating the results.
|
|
1701
2073
|
The function handles progress tracking, logging, and error handling throughout the process.
|
|
1702
2074
|
|
|
1703
|
-
:param target: The target model or function to scan
|
|
1704
|
-
:type target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget]
|
|
1705
|
-
:param call_orchestrator: Function to call to create an orchestrator
|
|
1706
|
-
:type call_orchestrator: Callable
|
|
1707
2075
|
:param strategy: The attack strategy to use
|
|
1708
2076
|
:type strategy: Union[AttackStrategy, List[AttackStrategy]]
|
|
1709
2077
|
:param risk_category: The risk category to evaluate
|
|
@@ -1737,9 +2105,10 @@ class RedTeam():
|
|
|
1737
2105
|
log_strategy_start(self.logger, strategy_name, risk_category.value)
|
|
1738
2106
|
|
|
1739
2107
|
converter = self._get_converter_for_strategy(strategy)
|
|
2108
|
+
call_orchestrator = self._get_orchestrator_for_attack_strategy(strategy)
|
|
1740
2109
|
try:
|
|
1741
2110
|
self.logger.debug(f"Calling orchestrator for {strategy_name} strategy")
|
|
1742
|
-
orchestrator = await call_orchestrator(self.chat_target, all_prompts, converter, strategy_name, risk_category.value, timeout)
|
|
2111
|
+
orchestrator = await call_orchestrator(chat_target=self.chat_target, all_prompts=all_prompts, converter=converter, strategy_name=strategy_name, risk_category=risk_category, risk_category_name=risk_category.value, timeout=timeout)
|
|
1743
2112
|
except PyritException as e:
|
|
1744
2113
|
log_error(self.logger, f"Error calling orchestrator for {strategy_name} strategy", e)
|
|
1745
2114
|
self.logger.debug(f"Orchestrator error for {strategy_name}/{risk_category.value}: {str(e)}")
|
|
@@ -1813,7 +2182,6 @@ class RedTeam():
|
|
|
1813
2182
|
target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget],
|
|
1814
2183
|
*,
|
|
1815
2184
|
scan_name: Optional[str] = None,
|
|
1816
|
-
num_turns : int = 1,
|
|
1817
2185
|
attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]] = [],
|
|
1818
2186
|
skip_upload: bool = False,
|
|
1819
2187
|
output_path: Optional[Union[str, os.PathLike]] = None,
|
|
@@ -1830,8 +2198,6 @@ class RedTeam():
|
|
|
1830
2198
|
:type target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration, PromptChatTarget]
|
|
1831
2199
|
:param scan_name: Optional name for the evaluation
|
|
1832
2200
|
:type scan_name: Optional[str]
|
|
1833
|
-
:param num_turns: Number of conversation turns to use in the scan
|
|
1834
|
-
:type num_turns: int
|
|
1835
2201
|
:param attack_strategies: List of attack strategies to use
|
|
1836
2202
|
:type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
|
|
1837
2203
|
:param skip_upload: Flag to determine if the scan results should be uploaded
|
|
@@ -1993,24 +2359,25 @@ class RedTeam():
|
|
|
1993
2359
|
else:
|
|
1994
2360
|
eval_run = self._start_redteam_mlflow_run(self.azure_ai_project, scan_name)
|
|
1995
2361
|
|
|
1996
|
-
self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination, evaluation_id=eval_run.info.run_id)
|
|
1997
2362
|
# Show URL for tracking progress
|
|
1998
2363
|
print(f"🔗 Track your red team scan in AI Foundry: {self.ai_studio_url}")
|
|
1999
|
-
self.logger.info(f"Started
|
|
2364
|
+
self.logger.info(f"Started Uploading run: {self.ai_studio_url}")
|
|
2000
2365
|
|
|
2001
2366
|
log_subsection_header(self.logger, "Setting up scan configuration")
|
|
2002
2367
|
flattened_attack_strategies = self._get_flattened_attack_strategies(attack_strategies)
|
|
2003
2368
|
self.logger.info(f"Using {len(flattened_attack_strategies)} attack strategies")
|
|
2004
2369
|
self.logger.info(f"Found {len(flattened_attack_strategies)} attack strategies")
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2370
|
+
|
|
2371
|
+
if len(flattened_attack_strategies) > 2 and (AttackStrategy.MultiTurn in flattened_attack_strategies or AttackStrategy.Crescendo in flattened_attack_strategies):
|
|
2372
|
+
self.logger.warning("MultiTurn and Crescendo strategies are not compatible with multiple attack strategies.")
|
|
2373
|
+
print("⚠️ Warning: MultiTurn and Crescendo strategies are not compatible with multiple attack strategies.")
|
|
2374
|
+
raise ValueError("MultiTurn and Crescendo strategies are not compatible with multiple attack strategies.")
|
|
2375
|
+
|
|
2376
|
+
# Calculate total tasks: #risk_categories * #converters
|
|
2377
|
+
self.total_tasks = len(self.risk_categories) * len(flattened_attack_strategies)
|
|
2011
2378
|
# Show task count for user awareness
|
|
2012
2379
|
print(f"📋 Planning {self.total_tasks} total tasks")
|
|
2013
|
-
self.logger.info(f"Total tasks: {self.total_tasks} ({len(self.risk_categories)} risk categories * {len(flattened_attack_strategies)} strategies
|
|
2380
|
+
self.logger.info(f"Total tasks: {self.total_tasks} ({len(self.risk_categories)} risk categories * {len(flattened_attack_strategies)} strategies)")
|
|
2014
2381
|
|
|
2015
2382
|
# Initialize our tracking dictionary early with empty structures
|
|
2016
2383
|
# This ensures we have a place to store results even if tasks fail
|
|
@@ -2096,9 +2463,9 @@ class RedTeam():
|
|
|
2096
2463
|
|
|
2097
2464
|
# Create all tasks for parallel processing
|
|
2098
2465
|
orchestrator_tasks = []
|
|
2099
|
-
combinations = list(itertools.product(
|
|
2466
|
+
combinations = list(itertools.product(flattened_attack_strategies, self.risk_categories))
|
|
2100
2467
|
|
|
2101
|
-
for combo_idx, (
|
|
2468
|
+
for combo_idx, (strategy, risk_category) in enumerate(combinations):
|
|
2102
2469
|
strategy_name = self._get_strategy_name(strategy)
|
|
2103
2470
|
objectives = all_objectives[strategy_name][risk_category.value]
|
|
2104
2471
|
|
|
@@ -2110,12 +2477,10 @@ class RedTeam():
|
|
|
2110
2477
|
progress_bar.update(1)
|
|
2111
2478
|
continue
|
|
2112
2479
|
|
|
2113
|
-
self.logger.debug(f"[{combo_idx+1}/{len(combinations)}] Creating task: {
|
|
2480
|
+
self.logger.debug(f"[{combo_idx+1}/{len(combinations)}] Creating task: {strategy_name} + {risk_category.value}")
|
|
2114
2481
|
|
|
2115
2482
|
orchestrator_tasks.append(
|
|
2116
2483
|
self._process_attack(
|
|
2117
|
-
target=target,
|
|
2118
|
-
call_orchestrator=call_orchestrator,
|
|
2119
2484
|
all_prompts=objectives,
|
|
2120
2485
|
strategy=strategy,
|
|
2121
2486
|
progress_bar=progress_bar,
|
|
@@ -2210,7 +2575,7 @@ class RedTeam():
|
|
|
2210
2575
|
)
|
|
2211
2576
|
|
|
2212
2577
|
if not skip_upload:
|
|
2213
|
-
self.logger.info("Logging results to
|
|
2578
|
+
self.logger.info("Logging results to AI Foundry")
|
|
2214
2579
|
await self._log_redteam_results_to_mlflow(
|
|
2215
2580
|
redteam_result=output,
|
|
2216
2581
|
eval_run=eval_run,
|