azure-ai-evaluation 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +1 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +1 -1
- azure/ai/evaluation/_aoai/label_grader.py +2 -2
- azure/ai/evaluation/_aoai/string_check_grader.py +2 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +2 -2
- azure/ai/evaluation/_common/__init__.py +3 -1
- azure/ai/evaluation/_common/evaluation_onedp_client.py +50 -5
- azure/ai/evaluation/_common/onedp/operations/_operations.py +1 -1
- azure/ai/evaluation/_common/rai_service.py +7 -6
- azure/ai/evaluation/_converters/_ai_services.py +162 -118
- azure/ai/evaluation/_converters/_models.py +76 -6
- azure/ai/evaluation/_eval_mapping.py +2 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +11 -13
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +24 -5
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +4 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +31 -29
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +10 -0
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +10 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +10 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +13 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +10 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +80 -10
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +26 -7
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +183 -128
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +3 -3
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +3 -3
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +2 -0
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +6 -5
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/METADATA +26 -3
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/RECORD +55 -55
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -39,6 +39,7 @@ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarg
|
|
|
39
39
|
from azure.ai.evaluation._common.math import list_mean_nan_safe, is_none_or_nan
|
|
40
40
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
41
41
|
from azure.ai.evaluation import evaluate
|
|
42
|
+
from azure.ai.evaluation._common import RedTeamUpload, ResultType
|
|
42
43
|
|
|
43
44
|
# Azure Core imports
|
|
44
45
|
from azure.core.credentials import TokenCredential
|
|
@@ -77,7 +78,7 @@ from ._utils.logging_utils import (
|
|
|
77
78
|
)
|
|
78
79
|
|
|
79
80
|
@experimental
|
|
80
|
-
class RedTeam
|
|
81
|
+
class RedTeam:
|
|
81
82
|
"""
|
|
82
83
|
This class uses various attack strategies to test the robustness of AI models against adversarial inputs.
|
|
83
84
|
It logs the results of these evaluations and provides detailed scorecards summarizing the attack success rates.
|
|
@@ -215,11 +216,12 @@ class RedTeam():
|
|
|
215
216
|
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
216
217
|
self.credential = credential
|
|
217
218
|
self.output_dir = output_dir
|
|
218
|
-
|
|
219
|
+
self._one_dp_project = is_onedp_project(azure_ai_project)
|
|
220
|
+
|
|
219
221
|
# Initialize logger without output directory (will be updated during scan)
|
|
220
222
|
self.logger = setup_logger()
|
|
221
223
|
|
|
222
|
-
if not
|
|
224
|
+
if not self._one_dp_project:
|
|
223
225
|
self.token_manager = ManagedIdentityAPITokenManager(
|
|
224
226
|
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
225
227
|
logger=logging.getLogger("RedTeamLogger"),
|
|
@@ -276,52 +278,67 @@ class RedTeam():
|
|
|
276
278
|
:raises EvaluationException: If no azure_ai_project is provided or trace destination cannot be determined
|
|
277
279
|
"""
|
|
278
280
|
if not azure_ai_project:
|
|
279
|
-
log_error(self.logger, "No azure_ai_project provided, cannot
|
|
281
|
+
log_error(self.logger, "No azure_ai_project provided, cannot upload run")
|
|
280
282
|
raise EvaluationException(
|
|
281
283
|
message="No azure_ai_project provided",
|
|
282
284
|
blame=ErrorBlame.USER_ERROR,
|
|
283
285
|
category=ErrorCategory.MISSING_FIELD,
|
|
284
286
|
target=ErrorTarget.RED_TEAM
|
|
285
287
|
)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
blame=ErrorBlame.SYSTEM_ERROR,
|
|
293
|
-
category=ErrorCategory.UNKNOWN,
|
|
294
|
-
target=ErrorTarget.RED_TEAM
|
|
288
|
+
|
|
289
|
+
if self._one_dp_project:
|
|
290
|
+
response = self.generated_rai_client._evaluation_onedp_client.start_red_team_run(
|
|
291
|
+
red_team=RedTeamUpload(
|
|
292
|
+
scan_name=run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
|
|
293
|
+
)
|
|
295
294
|
)
|
|
296
|
-
|
|
297
|
-
ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
|
|
298
|
-
|
|
299
|
-
management_client = LiteMLClient(
|
|
300
|
-
subscription_id=ws_triad.subscription_id,
|
|
301
|
-
resource_group=ws_triad.resource_group_name,
|
|
302
|
-
logger=self.logger,
|
|
303
|
-
credential=azure_ai_project.get("credential")
|
|
304
|
-
)
|
|
305
|
-
|
|
306
|
-
tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
|
|
307
|
-
|
|
308
|
-
run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
309
|
-
self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
|
|
310
|
-
eval_run = EvalRun(
|
|
311
|
-
run_name=run_display_name,
|
|
312
|
-
tracking_uri=cast(str, tracking_uri),
|
|
313
|
-
subscription_id=ws_triad.subscription_id,
|
|
314
|
-
group_name=ws_triad.resource_group_name,
|
|
315
|
-
workspace_name=ws_triad.workspace_name,
|
|
316
|
-
management_client=management_client, # type: ignore
|
|
317
|
-
)
|
|
318
|
-
eval_run._start_run()
|
|
319
|
-
self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
|
|
320
295
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
296
|
+
self.ai_studio_url = response.properties.get("AiStudioEvaluationUri")
|
|
297
|
+
|
|
298
|
+
return response
|
|
299
|
+
|
|
300
|
+
else:
|
|
301
|
+
trace_destination = _trace_destination_from_project_scope(azure_ai_project)
|
|
302
|
+
if not trace_destination:
|
|
303
|
+
self.logger.warning("Could not determine trace destination from project scope")
|
|
304
|
+
raise EvaluationException(
|
|
305
|
+
message="Could not determine trace destination",
|
|
306
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
307
|
+
category=ErrorCategory.UNKNOWN,
|
|
308
|
+
target=ErrorTarget.RED_TEAM
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
|
|
312
|
+
|
|
313
|
+
management_client = LiteMLClient(
|
|
314
|
+
subscription_id=ws_triad.subscription_id,
|
|
315
|
+
resource_group=ws_triad.resource_group_name,
|
|
316
|
+
logger=self.logger,
|
|
317
|
+
credential=azure_ai_project.get("credential")
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
|
|
321
|
+
|
|
322
|
+
run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
323
|
+
self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
|
|
324
|
+
eval_run = EvalRun(
|
|
325
|
+
run_name=run_display_name,
|
|
326
|
+
tracking_uri=cast(str, tracking_uri),
|
|
327
|
+
subscription_id=ws_triad.subscription_id,
|
|
328
|
+
group_name=ws_triad.resource_group_name,
|
|
329
|
+
workspace_name=ws_triad.workspace_name,
|
|
330
|
+
management_client=management_client, # type: ignore
|
|
331
|
+
)
|
|
332
|
+
eval_run._start_run()
|
|
333
|
+
self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
|
|
334
|
+
|
|
335
|
+
self.trace_destination = trace_destination
|
|
336
|
+
self.logger.debug(f"MLFlow run created successfully with ID: {eval_run}")
|
|
337
|
+
|
|
338
|
+
self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination,
|
|
339
|
+
evaluation_id=eval_run.info.run_id)
|
|
340
|
+
|
|
341
|
+
return eval_run
|
|
325
342
|
|
|
326
343
|
|
|
327
344
|
async def _log_redteam_results_to_mlflow(
|
|
@@ -343,58 +360,59 @@ class RedTeam():
|
|
|
343
360
|
"""
|
|
344
361
|
self.logger.debug(f"Logging results to MLFlow, _skip_evals={_skip_evals}")
|
|
345
362
|
artifact_name = "instance_results.json"
|
|
363
|
+
eval_info_name = "redteam_info.json"
|
|
364
|
+
properties = {}
|
|
346
365
|
|
|
347
366
|
# If we have a scan output directory, save the results there first
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
self
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
367
|
+
import tempfile
|
|
368
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
369
|
+
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
370
|
+
artifact_path = os.path.join(self.scan_output_dir, artifact_name)
|
|
371
|
+
self.logger.debug(f"Saving artifact to scan output directory: {artifact_path}")
|
|
372
|
+
with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
373
|
+
if _skip_evals:
|
|
374
|
+
# In _skip_evals mode, we write the conversations in conversation/messages format
|
|
375
|
+
f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
|
|
376
|
+
elif redteam_result.scan_result:
|
|
377
|
+
# Create a copy to avoid modifying the original scan result
|
|
378
|
+
result_with_conversations = redteam_result.scan_result.copy() if isinstance(redteam_result.scan_result, dict) else {}
|
|
379
|
+
|
|
380
|
+
# Preserve all original fields needed for scorecard generation
|
|
381
|
+
result_with_conversations["scorecard"] = result_with_conversations.get("scorecard", {})
|
|
382
|
+
result_with_conversations["parameters"] = result_with_conversations.get("parameters", {})
|
|
383
|
+
|
|
384
|
+
# Add conversations field with all conversation data including user messages
|
|
385
|
+
result_with_conversations["conversations"] = redteam_result.attack_details or []
|
|
386
|
+
|
|
387
|
+
# Keep original attack_details field to preserve compatibility with existing code
|
|
388
|
+
if "attack_details" not in result_with_conversations and redteam_result.attack_details is not None:
|
|
389
|
+
result_with_conversations["attack_details"] = redteam_result.attack_details
|
|
390
|
+
|
|
391
|
+
json.dump(result_with_conversations, f)
|
|
371
392
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
f.write(json.dumps(red_team_info_logged))
|
|
384
|
-
|
|
385
|
-
# Also save a human-readable scorecard if available
|
|
386
|
-
if not _skip_evals and redteam_result.scan_result:
|
|
387
|
-
scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
|
|
388
|
-
with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
389
|
-
f.write(self._to_scorecard(redteam_result.scan_result))
|
|
390
|
-
self.logger.debug(f"Saved scorecard to: {scorecard_path}")
|
|
393
|
+
eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
|
|
394
|
+
self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
|
|
395
|
+
with open(eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
396
|
+
# Remove evaluation_result from red_team_info before logging
|
|
397
|
+
red_team_info_logged = {}
|
|
398
|
+
for strategy, harms_dict in self.red_team_info.items():
|
|
399
|
+
red_team_info_logged[strategy] = {}
|
|
400
|
+
for harm, info_dict in harms_dict.items():
|
|
401
|
+
info_dict.pop("evaluation_result", None)
|
|
402
|
+
red_team_info_logged[strategy][harm] = info_dict
|
|
403
|
+
f.write(json.dumps(red_team_info_logged))
|
|
391
404
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
405
|
+
# Also save a human-readable scorecard if available
|
|
406
|
+
if not _skip_evals and redteam_result.scan_result:
|
|
407
|
+
scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
|
|
408
|
+
with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
409
|
+
f.write(self._to_scorecard(redteam_result.scan_result))
|
|
410
|
+
self.logger.debug(f"Saved scorecard to: {scorecard_path}")
|
|
411
|
+
|
|
412
|
+
# Create a dedicated artifacts directory with proper structure for MLFlow
|
|
413
|
+
# MLFlow requires the artifact_name file to be in the directory we're logging
|
|
414
|
+
|
|
415
|
+
# First, create the main artifact file that MLFlow expects
|
|
398
416
|
with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
399
417
|
if _skip_evals:
|
|
400
418
|
f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
|
|
@@ -425,51 +443,89 @@ class RedTeam():
|
|
|
425
443
|
self.logger.warning(f"Failed to copy file {file} to artifact directory: {str(e)}")
|
|
426
444
|
|
|
427
445
|
# Log the entire directory to MLFlow
|
|
428
|
-
try:
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
except Exception as e:
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
self.logger.debug("Logged scan_output_dir property to MLFlow")
|
|
439
|
-
except Exception as e:
|
|
440
|
-
self.logger.warning(f"Failed to log scan_output_dir property to MLFlow: {str(e)}")
|
|
441
|
-
else:
|
|
442
|
-
# Use temporary directory as before if no scan output directory exists
|
|
443
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
446
|
+
# try:
|
|
447
|
+
# eval_run.log_artifact(tmpdir, artifact_name)
|
|
448
|
+
# eval_run.log_artifact(tmpdir, eval_info_name)
|
|
449
|
+
# self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
|
|
450
|
+
# except Exception as e:
|
|
451
|
+
# self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
|
|
452
|
+
|
|
453
|
+
properties.update({"scan_output_dir": str(self.scan_output_dir)})
|
|
454
|
+
else:
|
|
455
|
+
# Use temporary directory as before if no scan output directory exists
|
|
444
456
|
artifact_file = Path(tmpdir) / artifact_name
|
|
445
457
|
with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
|
|
446
458
|
if _skip_evals:
|
|
447
459
|
f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
|
|
448
460
|
elif redteam_result.scan_result:
|
|
449
461
|
json.dump(redteam_result.scan_result, f)
|
|
450
|
-
eval_run.log_artifact(tmpdir, artifact_name)
|
|
462
|
+
# eval_run.log_artifact(tmpdir, artifact_name)
|
|
451
463
|
self.logger.debug(f"Logged artifact: {artifact_name}")
|
|
452
464
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
465
|
+
properties.update({
|
|
466
|
+
"redteaming": "asr", # Red team agent specific run properties to help UI identify this as a redteaming run
|
|
467
|
+
EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
|
|
468
|
+
})
|
|
469
|
+
|
|
470
|
+
metrics = {}
|
|
471
|
+
if redteam_result.scan_result:
|
|
472
|
+
scorecard = redteam_result.scan_result["scorecard"]
|
|
473
|
+
joint_attack_summary = scorecard["joint_risk_attack_summary"]
|
|
474
|
+
|
|
475
|
+
if joint_attack_summary:
|
|
476
|
+
for risk_category_summary in joint_attack_summary:
|
|
477
|
+
risk_category = risk_category_summary.get("risk_category").lower()
|
|
478
|
+
for key, value in risk_category_summary.items():
|
|
479
|
+
if key != "risk_category":
|
|
480
|
+
metrics.update({
|
|
481
|
+
f"{risk_category}_{key}": cast(float, value)
|
|
482
|
+
})
|
|
483
|
+
# eval_run.log_metric(f"{risk_category}_{key}", cast(float, value))
|
|
484
|
+
self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
|
|
485
|
+
|
|
486
|
+
if self._one_dp_project:
|
|
487
|
+
try:
|
|
488
|
+
create_evaluation_result_response = self.generated_rai_client._evaluation_onedp_client.create_evaluation_result(
|
|
489
|
+
name=uuid.uuid4(),
|
|
490
|
+
path=tmpdir,
|
|
491
|
+
metrics=metrics,
|
|
492
|
+
result_type=ResultType.REDTEAM
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
update_run_response = self.generated_rai_client._evaluation_onedp_client.update_red_team_run(
|
|
496
|
+
name=eval_run.id,
|
|
497
|
+
red_team=RedTeamUpload(
|
|
498
|
+
id=eval_run.id,
|
|
499
|
+
scan_name=eval_run.scan_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
|
|
500
|
+
status="Completed",
|
|
501
|
+
outputs={
|
|
502
|
+
'evaluationResultId': create_evaluation_result_response.id,
|
|
503
|
+
},
|
|
504
|
+
properties=properties,
|
|
505
|
+
)
|
|
506
|
+
)
|
|
507
|
+
self.logger.debug(f"Updated UploadRun: {update_run_response.id}")
|
|
508
|
+
except Exception as e:
|
|
509
|
+
self.logger.warning(f"Failed to upload red team results to AI Foundry: {str(e)}")
|
|
510
|
+
else:
|
|
511
|
+
# Log the entire directory to MLFlow
|
|
512
|
+
try:
|
|
513
|
+
eval_run.log_artifact(tmpdir, artifact_name)
|
|
514
|
+
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
|
|
515
|
+
eval_run.log_artifact(tmpdir, eval_info_name)
|
|
516
|
+
self.logger.debug(f"Successfully logged artifacts directory to AI Foundry")
|
|
517
|
+
except Exception as e:
|
|
518
|
+
self.logger.warning(f"Failed to log artifacts to AI Foundry: {str(e)}")
|
|
519
|
+
|
|
520
|
+
for k,v in metrics.items():
|
|
521
|
+
eval_run.log_metric(k, v)
|
|
522
|
+
self.logger.debug(f"Logged metric: {k} = {v}")
|
|
523
|
+
|
|
524
|
+
eval_run.write_properties_to_run_history(properties)
|
|
525
|
+
|
|
526
|
+
eval_run._end_run("FINISHED")
|
|
459
527
|
|
|
460
|
-
|
|
461
|
-
scorecard = redteam_result.scan_result["scorecard"]
|
|
462
|
-
joint_attack_summary = scorecard["joint_risk_attack_summary"]
|
|
463
|
-
|
|
464
|
-
if joint_attack_summary:
|
|
465
|
-
for risk_category_summary in joint_attack_summary:
|
|
466
|
-
risk_category = risk_category_summary.get("risk_category").lower()
|
|
467
|
-
for key, value in risk_category_summary.items():
|
|
468
|
-
if key != "risk_category":
|
|
469
|
-
eval_run.log_metric(f"{risk_category}_{key}", cast(float, value))
|
|
470
|
-
self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
|
|
471
|
-
eval_run._end_run("FINISHED")
|
|
472
|
-
self.logger.info("Successfully logged results to MLFlow")
|
|
528
|
+
self.logger.info("Successfully logged results to AI Foundry")
|
|
473
529
|
return None
|
|
474
530
|
|
|
475
531
|
# Using the utility function from strategy_utils.py instead
|
|
@@ -1993,10 +2049,9 @@ class RedTeam():
|
|
|
1993
2049
|
else:
|
|
1994
2050
|
eval_run = self._start_redteam_mlflow_run(self.azure_ai_project, scan_name)
|
|
1995
2051
|
|
|
1996
|
-
self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination, evaluation_id=eval_run.info.run_id)
|
|
1997
2052
|
# Show URL for tracking progress
|
|
1998
2053
|
print(f"🔗 Track your red team scan in AI Foundry: {self.ai_studio_url}")
|
|
1999
|
-
self.logger.info(f"Started
|
|
2054
|
+
self.logger.info(f"Started Uploading run: {self.ai_studio_url}")
|
|
2000
2055
|
|
|
2001
2056
|
log_subsection_header(self.logger, "Setting up scan configuration")
|
|
2002
2057
|
flattened_attack_strategies = self._get_flattened_attack_strategies(attack_strategies)
|
|
@@ -2210,7 +2265,7 @@ class RedTeam():
|
|
|
2210
2265
|
)
|
|
2211
2266
|
|
|
2212
2267
|
if not skip_upload:
|
|
2213
|
-
self.logger.info("Logging results to
|
|
2268
|
+
self.logger.info("Logging results to AI Foundry")
|
|
2214
2269
|
await self._log_redteam_results_to_mlflow(
|
|
2215
2270
|
redteam_result=output,
|
|
2216
2271
|
eval_run=eval_run,
|
|
@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
|
|
|
12
12
|
|
|
13
13
|
# Azure endpoint constants
|
|
14
14
|
AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
|
|
15
|
-
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://
|
|
15
|
+
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
|
|
16
16
|
AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
|
|
17
17
|
AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
|
|
18
18
|
r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
|
|
@@ -28,9 +28,9 @@ class DirectAttackSimulator:
|
|
|
28
28
|
Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
29
29
|
This simulator converses with your AI system using prompts designed to interrupt normal functionality.
|
|
30
30
|
|
|
31
|
-
:param azure_ai_project: The
|
|
32
|
-
name.
|
|
33
|
-
:type azure_ai_project:
|
|
31
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
32
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
33
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
34
34
|
:param credential: The credential for connecting to Azure AI project.
|
|
35
35
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
36
36
|
|
|
@@ -30,9 +30,9 @@ class IndirectAttackSimulator(AdversarialSimulator):
|
|
|
30
30
|
"""
|
|
31
31
|
Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
32
32
|
|
|
33
|
-
:param azure_ai_project: The
|
|
34
|
-
name.
|
|
35
|
-
:type azure_ai_project:
|
|
33
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
34
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
35
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
36
36
|
:param credential: The credential for connecting to Azure AI project.
|
|
37
37
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
38
38
|
|
|
@@ -12,6 +12,7 @@ from azure.ai.evaluation._common.raiclient import MachineLearningServicesClient
|
|
|
12
12
|
from azure.ai.evaluation._constants import TokenScope
|
|
13
13
|
from azure.ai.evaluation._common.utils import is_onedp_project
|
|
14
14
|
from azure.ai.evaluation._common.onedp import AIProjectClient
|
|
15
|
+
from azure.ai.evaluation._common import EvaluationServiceOneDPClient
|
|
15
16
|
import jwt
|
|
16
17
|
import time
|
|
17
18
|
import ast
|
|
@@ -46,6 +47,7 @@ class GeneratedRAIClient:
|
|
|
46
47
|
).rai_svc
|
|
47
48
|
else:
|
|
48
49
|
self._client = AIProjectClient(endpoint=azure_ai_project, credential=token_manager).red_teams
|
|
50
|
+
self._evaluation_onedp_client = EvaluationServiceOneDPClient(endpoint=azure_ai_project, credential=token_manager)
|
|
49
51
|
|
|
50
52
|
def _get_service_discovery_url(self):
|
|
51
53
|
"""Get the service discovery URL.
|
|
@@ -146,12 +146,13 @@ class AdversarialTemplate:
|
|
|
146
146
|
|
|
147
147
|
class AdversarialTemplateHandler:
|
|
148
148
|
"""
|
|
149
|
-
|
|
149
|
+
Initialize the AdversarialTemplateHandler.
|
|
150
150
|
|
|
151
|
-
:param azure_ai_project: The Azure AI project
|
|
152
|
-
|
|
153
|
-
:
|
|
154
|
-
:
|
|
151
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
152
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
153
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
154
|
+
:param rai_client: The RAI client or AI Project client used for fetching parameters.
|
|
155
|
+
:type rai_client: Union[~azure.ai.evaluation.simulator._model_tools.RAIClient, ~azure.ai.evaluation._common.onedp._client.AIProjectClient]
|
|
155
156
|
"""
|
|
156
157
|
|
|
157
158
|
def __init__(self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]) -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -30,10 +30,11 @@ Requires-Dist: nltk>=3.9.1
|
|
|
30
30
|
Requires-Dist: azure-storage-blob>=12.10.0
|
|
31
31
|
Requires-Dist: httpx>=0.25.1
|
|
32
32
|
Requires-Dist: pandas<3.0.0,>=2.1.2
|
|
33
|
-
Requires-Dist: openai>=1.
|
|
33
|
+
Requires-Dist: openai>=1.78.0
|
|
34
34
|
Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
|
|
35
35
|
Requires-Dist: msrest>=0.6.21
|
|
36
36
|
Requires-Dist: Jinja2>=3.1.6
|
|
37
|
+
Requires-Dist: aiohttp>=3.0
|
|
37
38
|
Provides-Extra: redteam
|
|
38
39
|
Requires-Dist: pyrit==0.8.1; extra == "redteam"
|
|
39
40
|
|
|
@@ -115,13 +116,23 @@ result = relevance_evaluator(
|
|
|
115
116
|
response="The capital of Japan is Tokyo."
|
|
116
117
|
)
|
|
117
118
|
|
|
118
|
-
#
|
|
119
|
+
# There are two ways to provide Azure AI Project.
|
|
120
|
+
# Option #1 : Using Azure AI Project Details
|
|
119
121
|
azure_ai_project = {
|
|
120
122
|
"subscription_id": "<subscription_id>",
|
|
121
123
|
"resource_group_name": "<resource_group_name>",
|
|
122
124
|
"project_name": "<project_name>",
|
|
123
125
|
}
|
|
124
126
|
|
|
127
|
+
violence_evaluator = ViolenceEvaluator(azure_ai_project)
|
|
128
|
+
result = violence_evaluator(
|
|
129
|
+
query="What is the capital of France?",
|
|
130
|
+
response="Paris."
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Option # 2 : Using Azure AI Project Url
|
|
134
|
+
azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
|
|
135
|
+
|
|
125
136
|
violence_evaluator = ViolenceEvaluator(azure_ai_project)
|
|
126
137
|
result = violence_evaluator(
|
|
127
138
|
query="What is the capital of France?",
|
|
@@ -272,11 +283,18 @@ with open("simulator_output.jsonl", "w") as f:
|
|
|
272
283
|
```python
|
|
273
284
|
from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
|
|
274
285
|
from azure.identity import DefaultAzureCredential
|
|
286
|
+
|
|
287
|
+
# There are two ways to provide Azure AI Project.
|
|
288
|
+
# Option #1 : Using Azure AI Project
|
|
275
289
|
azure_ai_project = {
|
|
276
290
|
"subscription_id": <subscription_id>,
|
|
277
291
|
"resource_group_name": <resource_group_name>,
|
|
278
292
|
"project_name": <project_name>
|
|
279
293
|
}
|
|
294
|
+
|
|
295
|
+
# Option #2 : Using Azure AI Project Url
|
|
296
|
+
azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
|
|
297
|
+
|
|
280
298
|
scenario = AdversarialScenario.ADVERSARIAL_QA
|
|
281
299
|
simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
|
|
282
300
|
|
|
@@ -382,6 +400,11 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
382
400
|
|
|
383
401
|
# Release History
|
|
384
402
|
|
|
403
|
+
## 1.7.0 (2025-05-12)
|
|
404
|
+
|
|
405
|
+
### Bugs Fixed
|
|
406
|
+
- azure-ai-evaluation failed with module not found [#40992](https://github.com/Azure/azure-sdk-for-python/issues/40992)
|
|
407
|
+
|
|
385
408
|
## 1.6.0 (2025-05-07)
|
|
386
409
|
|
|
387
410
|
### Features Added
|