azure-ai-evaluation 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (55) hide show
  1. azure/ai/evaluation/__init__.py +1 -0
  2. azure/ai/evaluation/_aoai/aoai_grader.py +1 -1
  3. azure/ai/evaluation/_aoai/label_grader.py +2 -2
  4. azure/ai/evaluation/_aoai/string_check_grader.py +2 -2
  5. azure/ai/evaluation/_aoai/text_similarity_grader.py +2 -2
  6. azure/ai/evaluation/_common/__init__.py +3 -1
  7. azure/ai/evaluation/_common/evaluation_onedp_client.py +50 -5
  8. azure/ai/evaluation/_common/onedp/operations/_operations.py +1 -1
  9. azure/ai/evaluation/_common/rai_service.py +7 -6
  10. azure/ai/evaluation/_converters/_ai_services.py +162 -118
  11. azure/ai/evaluation/_converters/_models.py +76 -6
  12. azure/ai/evaluation/_eval_mapping.py +2 -0
  13. azure/ai/evaluation/_evaluate/_evaluate.py +11 -13
  14. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +24 -5
  15. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
  16. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
  17. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
  18. azure/ai/evaluation/_evaluators/_common/_base_eval.py +4 -0
  19. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
  20. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
  21. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
  22. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
  23. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
  24. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +31 -29
  25. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
  26. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +10 -0
  27. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
  28. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +10 -0
  29. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +10 -0
  30. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
  31. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
  32. azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
  33. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +10 -0
  34. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +13 -0
  35. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
  36. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
  37. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
  38. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +10 -0
  39. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -0
  40. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +80 -10
  41. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
  42. azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
  43. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +26 -7
  44. azure/ai/evaluation/_version.py +1 -1
  45. azure/ai/evaluation/red_team/_red_team.py +183 -128
  46. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  47. azure/ai/evaluation/simulator/_direct_attack_simulator.py +3 -3
  48. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +3 -3
  49. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +2 -0
  50. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +6 -5
  51. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/METADATA +26 -3
  52. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/RECORD +55 -55
  53. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/NOTICE.txt +0 -0
  54. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/WHEEL +0 -0
  55. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/top_level.txt +0 -0
@@ -39,6 +39,7 @@ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarg
39
39
  from azure.ai.evaluation._common.math import list_mean_nan_safe, is_none_or_nan
40
40
  from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
41
41
  from azure.ai.evaluation import evaluate
42
+ from azure.ai.evaluation._common import RedTeamUpload, ResultType
42
43
 
43
44
  # Azure Core imports
44
45
  from azure.core.credentials import TokenCredential
@@ -77,7 +78,7 @@ from ._utils.logging_utils import (
77
78
  )
78
79
 
79
80
  @experimental
80
- class RedTeam():
81
+ class RedTeam:
81
82
  """
82
83
  This class uses various attack strategies to test the robustness of AI models against adversarial inputs.
83
84
  It logs the results of these evaluations and provides detailed scorecards summarizing the attack success rates.
@@ -215,11 +216,12 @@ class RedTeam():
215
216
  self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
216
217
  self.credential = credential
217
218
  self.output_dir = output_dir
218
-
219
+ self._one_dp_project = is_onedp_project(azure_ai_project)
220
+
219
221
  # Initialize logger without output directory (will be updated during scan)
220
222
  self.logger = setup_logger()
221
223
 
222
- if not is_onedp_project(azure_ai_project):
224
+ if not self._one_dp_project:
223
225
  self.token_manager = ManagedIdentityAPITokenManager(
224
226
  token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
225
227
  logger=logging.getLogger("RedTeamLogger"),
@@ -276,52 +278,67 @@ class RedTeam():
276
278
  :raises EvaluationException: If no azure_ai_project is provided or trace destination cannot be determined
277
279
  """
278
280
  if not azure_ai_project:
279
- log_error(self.logger, "No azure_ai_project provided, cannot start MLFlow run")
281
+ log_error(self.logger, "No azure_ai_project provided, cannot upload run")
280
282
  raise EvaluationException(
281
283
  message="No azure_ai_project provided",
282
284
  blame=ErrorBlame.USER_ERROR,
283
285
  category=ErrorCategory.MISSING_FIELD,
284
286
  target=ErrorTarget.RED_TEAM
285
287
  )
286
-
287
- trace_destination = _trace_destination_from_project_scope(azure_ai_project)
288
- if not trace_destination:
289
- self.logger.warning("Could not determine trace destination from project scope")
290
- raise EvaluationException(
291
- message="Could not determine trace destination",
292
- blame=ErrorBlame.SYSTEM_ERROR,
293
- category=ErrorCategory.UNKNOWN,
294
- target=ErrorTarget.RED_TEAM
288
+
289
+ if self._one_dp_project:
290
+ response = self.generated_rai_client._evaluation_onedp_client.start_red_team_run(
291
+ red_team=RedTeamUpload(
292
+ scan_name=run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
293
+ )
295
294
  )
296
-
297
- ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
298
-
299
- management_client = LiteMLClient(
300
- subscription_id=ws_triad.subscription_id,
301
- resource_group=ws_triad.resource_group_name,
302
- logger=self.logger,
303
- credential=azure_ai_project.get("credential")
304
- )
305
-
306
- tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
307
-
308
- run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
309
- self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
310
- eval_run = EvalRun(
311
- run_name=run_display_name,
312
- tracking_uri=cast(str, tracking_uri),
313
- subscription_id=ws_triad.subscription_id,
314
- group_name=ws_triad.resource_group_name,
315
- workspace_name=ws_triad.workspace_name,
316
- management_client=management_client, # type: ignore
317
- )
318
- eval_run._start_run()
319
- self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
320
295
 
321
- self.trace_destination = trace_destination
322
- self.logger.debug(f"MLFlow run created successfully with ID: {eval_run}")
323
-
324
- return eval_run
296
+ self.ai_studio_url = response.properties.get("AiStudioEvaluationUri")
297
+
298
+ return response
299
+
300
+ else:
301
+ trace_destination = _trace_destination_from_project_scope(azure_ai_project)
302
+ if not trace_destination:
303
+ self.logger.warning("Could not determine trace destination from project scope")
304
+ raise EvaluationException(
305
+ message="Could not determine trace destination",
306
+ blame=ErrorBlame.SYSTEM_ERROR,
307
+ category=ErrorCategory.UNKNOWN,
308
+ target=ErrorTarget.RED_TEAM
309
+ )
310
+
311
+ ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
312
+
313
+ management_client = LiteMLClient(
314
+ subscription_id=ws_triad.subscription_id,
315
+ resource_group=ws_triad.resource_group_name,
316
+ logger=self.logger,
317
+ credential=azure_ai_project.get("credential")
318
+ )
319
+
320
+ tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
321
+
322
+ run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
323
+ self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
324
+ eval_run = EvalRun(
325
+ run_name=run_display_name,
326
+ tracking_uri=cast(str, tracking_uri),
327
+ subscription_id=ws_triad.subscription_id,
328
+ group_name=ws_triad.resource_group_name,
329
+ workspace_name=ws_triad.workspace_name,
330
+ management_client=management_client, # type: ignore
331
+ )
332
+ eval_run._start_run()
333
+ self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
334
+
335
+ self.trace_destination = trace_destination
336
+ self.logger.debug(f"MLFlow run created successfully with ID: {eval_run}")
337
+
338
+ self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination,
339
+ evaluation_id=eval_run.info.run_id)
340
+
341
+ return eval_run
325
342
 
326
343
 
327
344
  async def _log_redteam_results_to_mlflow(
@@ -343,58 +360,59 @@ class RedTeam():
343
360
  """
344
361
  self.logger.debug(f"Logging results to MLFlow, _skip_evals={_skip_evals}")
345
362
  artifact_name = "instance_results.json"
363
+ eval_info_name = "redteam_info.json"
364
+ properties = {}
346
365
 
347
366
  # If we have a scan output directory, save the results there first
348
- if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
349
- artifact_path = os.path.join(self.scan_output_dir, artifact_name)
350
- self.logger.debug(f"Saving artifact to scan output directory: {artifact_path}")
351
- with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
352
- if _skip_evals:
353
- # In _skip_evals mode, we write the conversations in conversation/messages format
354
- f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
355
- elif redteam_result.scan_result:
356
- # Create a copy to avoid modifying the original scan result
357
- result_with_conversations = redteam_result.scan_result.copy() if isinstance(redteam_result.scan_result, dict) else {}
358
-
359
- # Preserve all original fields needed for scorecard generation
360
- result_with_conversations["scorecard"] = result_with_conversations.get("scorecard", {})
361
- result_with_conversations["parameters"] = result_with_conversations.get("parameters", {})
362
-
363
- # Add conversations field with all conversation data including user messages
364
- result_with_conversations["conversations"] = redteam_result.attack_details or []
365
-
366
- # Keep original attack_details field to preserve compatibility with existing code
367
- if "attack_details" not in result_with_conversations and redteam_result.attack_details is not None:
368
- result_with_conversations["attack_details"] = redteam_result.attack_details
369
-
370
- json.dump(result_with_conversations, f)
367
+ import tempfile
368
+ with tempfile.TemporaryDirectory() as tmpdir:
369
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
370
+ artifact_path = os.path.join(self.scan_output_dir, artifact_name)
371
+ self.logger.debug(f"Saving artifact to scan output directory: {artifact_path}")
372
+ with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
373
+ if _skip_evals:
374
+ # In _skip_evals mode, we write the conversations in conversation/messages format
375
+ f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
376
+ elif redteam_result.scan_result:
377
+ # Create a copy to avoid modifying the original scan result
378
+ result_with_conversations = redteam_result.scan_result.copy() if isinstance(redteam_result.scan_result, dict) else {}
379
+
380
+ # Preserve all original fields needed for scorecard generation
381
+ result_with_conversations["scorecard"] = result_with_conversations.get("scorecard", {})
382
+ result_with_conversations["parameters"] = result_with_conversations.get("parameters", {})
383
+
384
+ # Add conversations field with all conversation data including user messages
385
+ result_with_conversations["conversations"] = redteam_result.attack_details or []
386
+
387
+ # Keep original attack_details field to preserve compatibility with existing code
388
+ if "attack_details" not in result_with_conversations and redteam_result.attack_details is not None:
389
+ result_with_conversations["attack_details"] = redteam_result.attack_details
390
+
391
+ json.dump(result_with_conversations, f)
371
392
 
372
- eval_info_name = "redteam_info.json"
373
- eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
374
- self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
375
- with open(eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
376
- # Remove evaluation_result from red_team_info before logging
377
- red_team_info_logged = {}
378
- for strategy, harms_dict in self.red_team_info.items():
379
- red_team_info_logged[strategy] = {}
380
- for harm, info_dict in harms_dict.items():
381
- info_dict.pop("evaluation_result", None)
382
- red_team_info_logged[strategy][harm] = info_dict
383
- f.write(json.dumps(red_team_info_logged))
384
-
385
- # Also save a human-readable scorecard if available
386
- if not _skip_evals and redteam_result.scan_result:
387
- scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
388
- with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
389
- f.write(self._to_scorecard(redteam_result.scan_result))
390
- self.logger.debug(f"Saved scorecard to: {scorecard_path}")
393
+ eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
394
+ self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
395
+ with open(eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
396
+ # Remove evaluation_result from red_team_info before logging
397
+ red_team_info_logged = {}
398
+ for strategy, harms_dict in self.red_team_info.items():
399
+ red_team_info_logged[strategy] = {}
400
+ for harm, info_dict in harms_dict.items():
401
+ info_dict.pop("evaluation_result", None)
402
+ red_team_info_logged[strategy][harm] = info_dict
403
+ f.write(json.dumps(red_team_info_logged))
391
404
 
392
- # Create a dedicated artifacts directory with proper structure for MLFlow
393
- # MLFlow requires the artifact_name file to be in the directory we're logging
394
-
395
- import tempfile
396
- with tempfile.TemporaryDirectory() as tmpdir:
397
- # First, create the main artifact file that MLFlow expects
405
+ # Also save a human-readable scorecard if available
406
+ if not _skip_evals and redteam_result.scan_result:
407
+ scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
408
+ with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
409
+ f.write(self._to_scorecard(redteam_result.scan_result))
410
+ self.logger.debug(f"Saved scorecard to: {scorecard_path}")
411
+
412
+ # Create a dedicated artifacts directory with proper structure for MLFlow
413
+ # MLFlow requires the artifact_name file to be in the directory we're logging
414
+
415
+ # First, create the main artifact file that MLFlow expects
398
416
  with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
399
417
  if _skip_evals:
400
418
  f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
@@ -425,51 +443,89 @@ class RedTeam():
425
443
  self.logger.warning(f"Failed to copy file {file} to artifact directory: {str(e)}")
426
444
 
427
445
  # Log the entire directory to MLFlow
428
- try:
429
- eval_run.log_artifact(tmpdir, artifact_name)
430
- eval_run.log_artifact(tmpdir, eval_info_name)
431
- self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
432
- except Exception as e:
433
- self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
434
-
435
- # Also log a direct property to capture the scan output directory
436
- try:
437
- eval_run.write_properties_to_run_history({"scan_output_dir": str(self.scan_output_dir)})
438
- self.logger.debug("Logged scan_output_dir property to MLFlow")
439
- except Exception as e:
440
- self.logger.warning(f"Failed to log scan_output_dir property to MLFlow: {str(e)}")
441
- else:
442
- # Use temporary directory as before if no scan output directory exists
443
- with tempfile.TemporaryDirectory() as tmpdir:
446
+ # try:
447
+ # eval_run.log_artifact(tmpdir, artifact_name)
448
+ # eval_run.log_artifact(tmpdir, eval_info_name)
449
+ # self.logger.debug(f"Successfully logged artifacts directory to MLFlow")
450
+ # except Exception as e:
451
+ # self.logger.warning(f"Failed to log artifacts to MLFlow: {str(e)}")
452
+
453
+ properties.update({"scan_output_dir": str(self.scan_output_dir)})
454
+ else:
455
+ # Use temporary directory as before if no scan output directory exists
444
456
  artifact_file = Path(tmpdir) / artifact_name
445
457
  with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
446
458
  if _skip_evals:
447
459
  f.write(json.dumps({"conversations": redteam_result.attack_details or []}))
448
460
  elif redteam_result.scan_result:
449
461
  json.dump(redteam_result.scan_result, f)
450
- eval_run.log_artifact(tmpdir, artifact_name)
462
+ # eval_run.log_artifact(tmpdir, artifact_name)
451
463
  self.logger.debug(f"Logged artifact: {artifact_name}")
452
464
 
453
- eval_run.write_properties_to_run_history({
454
- EvaluationRunProperties.RUN_TYPE: "eval_run",
455
- "redteaming": "asr", # Red team agent specific run properties to help UI identify this as a redteaming run
456
- EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
457
- "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
458
- })
465
+ properties.update({
466
+ "redteaming": "asr", # Red team agent specific run properties to help UI identify this as a redteaming run
467
+ EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
468
+ })
469
+
470
+ metrics = {}
471
+ if redteam_result.scan_result:
472
+ scorecard = redteam_result.scan_result["scorecard"]
473
+ joint_attack_summary = scorecard["joint_risk_attack_summary"]
474
+
475
+ if joint_attack_summary:
476
+ for risk_category_summary in joint_attack_summary:
477
+ risk_category = risk_category_summary.get("risk_category").lower()
478
+ for key, value in risk_category_summary.items():
479
+ if key != "risk_category":
480
+ metrics.update({
481
+ f"{risk_category}_{key}": cast(float, value)
482
+ })
483
+ # eval_run.log_metric(f"{risk_category}_{key}", cast(float, value))
484
+ self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
485
+
486
+ if self._one_dp_project:
487
+ try:
488
+ create_evaluation_result_response = self.generated_rai_client._evaluation_onedp_client.create_evaluation_result(
489
+ name=uuid.uuid4(),
490
+ path=tmpdir,
491
+ metrics=metrics,
492
+ result_type=ResultType.REDTEAM
493
+ )
494
+
495
+ update_run_response = self.generated_rai_client._evaluation_onedp_client.update_red_team_run(
496
+ name=eval_run.id,
497
+ red_team=RedTeamUpload(
498
+ id=eval_run.id,
499
+ scan_name=eval_run.scan_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
500
+ status="Completed",
501
+ outputs={
502
+ 'evaluationResultId': create_evaluation_result_response.id,
503
+ },
504
+ properties=properties,
505
+ )
506
+ )
507
+ self.logger.debug(f"Updated UploadRun: {update_run_response.id}")
508
+ except Exception as e:
509
+ self.logger.warning(f"Failed to upload red team results to AI Foundry: {str(e)}")
510
+ else:
511
+ # Log the entire directory to MLFlow
512
+ try:
513
+ eval_run.log_artifact(tmpdir, artifact_name)
514
+ if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
515
+ eval_run.log_artifact(tmpdir, eval_info_name)
516
+ self.logger.debug(f"Successfully logged artifacts directory to AI Foundry")
517
+ except Exception as e:
518
+ self.logger.warning(f"Failed to log artifacts to AI Foundry: {str(e)}")
519
+
520
+ for k,v in metrics.items():
521
+ eval_run.log_metric(k, v)
522
+ self.logger.debug(f"Logged metric: {k} = {v}")
523
+
524
+ eval_run.write_properties_to_run_history(properties)
525
+
526
+ eval_run._end_run("FINISHED")
459
527
 
460
- if redteam_result.scan_result:
461
- scorecard = redteam_result.scan_result["scorecard"]
462
- joint_attack_summary = scorecard["joint_risk_attack_summary"]
463
-
464
- if joint_attack_summary:
465
- for risk_category_summary in joint_attack_summary:
466
- risk_category = risk_category_summary.get("risk_category").lower()
467
- for key, value in risk_category_summary.items():
468
- if key != "risk_category":
469
- eval_run.log_metric(f"{risk_category}_{key}", cast(float, value))
470
- self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
471
- eval_run._end_run("FINISHED")
472
- self.logger.info("Successfully logged results to MLFlow")
528
+ self.logger.info("Successfully logged results to AI Foundry")
473
529
  return None
474
530
 
475
531
  # Using the utility function from strategy_utils.py instead
@@ -1993,10 +2049,9 @@ class RedTeam():
1993
2049
  else:
1994
2050
  eval_run = self._start_redteam_mlflow_run(self.azure_ai_project, scan_name)
1995
2051
 
1996
- self.ai_studio_url = _get_ai_studio_url(trace_destination=self.trace_destination, evaluation_id=eval_run.info.run_id)
1997
2052
  # Show URL for tracking progress
1998
2053
  print(f"🔗 Track your red team scan in AI Foundry: {self.ai_studio_url}")
1999
- self.logger.info(f"Started MLFlow run: {self.ai_studio_url}")
2054
+ self.logger.info(f"Started Uploading run: {self.ai_studio_url}")
2000
2055
 
2001
2056
  log_subsection_header(self.logger, "Setting up scan configuration")
2002
2057
  flattened_attack_strategies = self._get_flattened_attack_strategies(attack_strategies)
@@ -2210,7 +2265,7 @@ class RedTeam():
2210
2265
  )
2211
2266
 
2212
2267
  if not skip_upload:
2213
- self.logger.info("Logging results to MLFlow")
2268
+ self.logger.info("Logging results to AI Foundry")
2214
2269
  await self._log_redteam_results_to_mlflow(
2215
2270
  redteam_result=output,
2216
2271
  eval_run=eval_run,
@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
12
12
 
13
13
  # Azure endpoint constants
14
14
  AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
15
- COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
15
+ COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
16
16
  AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
17
17
  AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
18
18
  r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
@@ -28,9 +28,9 @@ class DirectAttackSimulator:
28
28
  Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
29
29
  This simulator converses with your AI system using prompts designed to interrupt normal functionality.
30
30
 
31
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
32
- name.
33
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
31
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
32
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
33
+ :type azure_ai_project: Union[str, AzureAIProject]
34
34
  :param credential: The credential for connecting to Azure AI project.
35
35
  :type credential: ~azure.core.credentials.TokenCredential
36
36
 
@@ -30,9 +30,9 @@ class IndirectAttackSimulator(AdversarialSimulator):
30
30
  """
31
31
  Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
32
32
 
33
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
34
- name.
35
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
33
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
34
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
35
+ :type azure_ai_project: Union[str, AzureAIProject]
36
36
  :param credential: The credential for connecting to Azure AI project.
37
37
  :type credential: ~azure.core.credentials.TokenCredential
38
38
 
@@ -12,6 +12,7 @@ from azure.ai.evaluation._common.raiclient import MachineLearningServicesClient
12
12
  from azure.ai.evaluation._constants import TokenScope
13
13
  from azure.ai.evaluation._common.utils import is_onedp_project
14
14
  from azure.ai.evaluation._common.onedp import AIProjectClient
15
+ from azure.ai.evaluation._common import EvaluationServiceOneDPClient
15
16
  import jwt
16
17
  import time
17
18
  import ast
@@ -46,6 +47,7 @@ class GeneratedRAIClient:
46
47
  ).rai_svc
47
48
  else:
48
49
  self._client = AIProjectClient(endpoint=azure_ai_project, credential=token_manager).red_teams
50
+ self._evaluation_onedp_client = EvaluationServiceOneDPClient(endpoint=azure_ai_project, credential=token_manager)
49
51
 
50
52
  def _get_service_discovery_url(self):
51
53
  """Get the service discovery URL.
@@ -146,12 +146,13 @@ class AdversarialTemplate:
146
146
 
147
147
  class AdversarialTemplateHandler:
148
148
  """
149
- Adversarial template handler constructor.
149
+ Initialize the AdversarialTemplateHandler.
150
150
 
151
- :param azure_ai_project: The Azure AI project.
152
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
153
- :param rai_client: The RAI client.
154
- :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
151
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
152
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
153
+ :type azure_ai_project: Union[str, AzureAIProject]
154
+ :param rai_client: The RAI client or AI Project client used for fetching parameters.
155
+ :type rai_client: Union[~azure.ai.evaluation.simulator._model_tools.RAIClient, ~azure.ai.evaluation._common.onedp._client.AIProjectClient]
155
156
  """
156
157
 
157
158
  def __init__(self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.6.0
3
+ Version: 1.7.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -30,10 +30,11 @@ Requires-Dist: nltk>=3.9.1
30
30
  Requires-Dist: azure-storage-blob>=12.10.0
31
31
  Requires-Dist: httpx>=0.25.1
32
32
  Requires-Dist: pandas<3.0.0,>=2.1.2
33
- Requires-Dist: openai>=1.73.0
33
+ Requires-Dist: openai>=1.78.0
34
34
  Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
35
35
  Requires-Dist: msrest>=0.6.21
36
36
  Requires-Dist: Jinja2>=3.1.6
37
+ Requires-Dist: aiohttp>=3.0
37
38
  Provides-Extra: redteam
38
39
  Requires-Dist: pyrit==0.8.1; extra == "redteam"
39
40
 
@@ -115,13 +116,23 @@ result = relevance_evaluator(
115
116
  response="The capital of Japan is Tokyo."
116
117
  )
117
118
 
118
- # AI assisted safety evaluator
119
+ # There are two ways to provide Azure AI Project.
120
+ # Option #1 : Using Azure AI Project Details
119
121
  azure_ai_project = {
120
122
  "subscription_id": "<subscription_id>",
121
123
  "resource_group_name": "<resource_group_name>",
122
124
  "project_name": "<project_name>",
123
125
  }
124
126
 
127
+ violence_evaluator = ViolenceEvaluator(azure_ai_project)
128
+ result = violence_evaluator(
129
+ query="What is the capital of France?",
130
+ response="Paris."
131
+ )
132
+
133
+ # Option # 2 : Using Azure AI Project Url
134
+ azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
135
+
125
136
  violence_evaluator = ViolenceEvaluator(azure_ai_project)
126
137
  result = violence_evaluator(
127
138
  query="What is the capital of France?",
@@ -272,11 +283,18 @@ with open("simulator_output.jsonl", "w") as f:
272
283
  ```python
273
284
  from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
274
285
  from azure.identity import DefaultAzureCredential
286
+
287
+ # There are two ways to provide Azure AI Project.
288
+ # Option #1 : Using Azure AI Project
275
289
  azure_ai_project = {
276
290
  "subscription_id": <subscription_id>,
277
291
  "resource_group_name": <resource_group_name>,
278
292
  "project_name": <project_name>
279
293
  }
294
+
295
+ # Option #2 : Using Azure AI Project Url
296
+ azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
297
+
280
298
  scenario = AdversarialScenario.ADVERSARIAL_QA
281
299
  simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
282
300
 
@@ -382,6 +400,11 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
382
400
 
383
401
  # Release History
384
402
 
403
+ ## 1.7.0 (2025-05-12)
404
+
405
+ ### Bugs Fixed
406
+ - azure-ai-evaluation failed with module not found [#40992](https://github.com/Azure/azure-sdk-for-python/issues/40992)
407
+
385
408
  ## 1.6.0 (2025-05-07)
386
409
 
387
410
  ### Features Added