azure-ai-evaluation 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. azure/ai/evaluation/__init__.py +1 -15
  2. azure/ai/evaluation/_common/utils.py +8 -8
  3. azure/ai/evaluation/_constants.py +3 -0
  4. azure/ai/evaluation/_evaluate/_evaluate.py +5 -2
  5. azure/ai/evaluation/_exceptions.py +0 -1
  6. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  7. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +640 -0
  8. azure/ai/evaluation/_version.py +2 -1
  9. azure/ai/evaluation/simulator/_adversarial_simulator.py +10 -3
  10. azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
  11. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
  12. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
  13. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/METADATA +7 -1
  14. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/RECORD +17 -23
  15. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  16. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  17. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  18. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  19. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  20. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  21. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  22. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  23. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/NOTICE.txt +0 -0
  24. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/WHEEL +0 -0
  25. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/top_level.txt +0 -0
@@ -12,14 +12,6 @@ from ._evaluators._content_safety import (
12
12
  SexualEvaluator,
13
13
  ViolenceEvaluator,
14
14
  )
15
- from ._evaluators._multimodal._content_safety_multimodal import (
16
- ContentSafetyMultimodalEvaluator,
17
- HateUnfairnessMultimodalEvaluator,
18
- SelfHarmMultimodalEvaluator,
19
- SexualMultimodalEvaluator,
20
- ViolenceMultimodalEvaluator,
21
- )
22
- from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
23
15
  from ._evaluators._f1_score import F1ScoreEvaluator
24
16
  from ._evaluators._fluency import FluencyEvaluator
25
17
  from ._evaluators._gleu import GleuScoreEvaluator
@@ -72,11 +64,5 @@ __all__ = [
72
64
  "EvaluatorConfig",
73
65
  "Conversation",
74
66
  "Message",
75
- "EvaluationResult",
76
- "ContentSafetyMultimodalEvaluator",
77
- "HateUnfairnessMultimodalEvaluator",
78
- "SelfHarmMultimodalEvaluator",
79
- "SexualMultimodalEvaluator",
80
- "ViolenceMultimodalEvaluator",
81
- "ProtectedMaterialMultimodalEvaluator",
67
+ "EvaluationResult"
82
68
  ]
@@ -366,7 +366,7 @@ def validate_conversation(conversation):
366
366
  if not isinstance(messages, list):
367
367
  raise_exception(
368
368
  "'messages' parameter must be a JSON-compatible list of chat messages",
369
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
369
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
370
370
  )
371
371
  expected_roles = {"user", "assistant", "system"}
372
372
  image_found = False
@@ -393,7 +393,7 @@ def validate_conversation(conversation):
393
393
  ):
394
394
  raise_exception(
395
395
  f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
396
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
396
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
397
397
  )
398
398
  if isinstance(message, AssistantMessage):
399
399
  assistant_message_count += 1
@@ -407,7 +407,7 @@ def validate_conversation(conversation):
407
407
  if message.get("role") not in expected_roles:
408
408
  raise_exception(
409
409
  f"Invalid role provided: {message.get('role')}. Message number: {num}",
410
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
410
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
411
411
  )
412
412
  if message.get("role") == "assistant":
413
413
  assistant_message_count += 1
@@ -417,7 +417,7 @@ def validate_conversation(conversation):
417
417
  if not isinstance(content, (str, list)):
418
418
  raise_exception(
419
419
  f"Content in each turn must be a string or array. Message number: {num}",
420
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
420
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
421
421
  )
422
422
  if isinstance(content, list):
423
423
  if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
@@ -425,21 +425,21 @@ def validate_conversation(conversation):
425
425
  if not image_found:
426
426
  raise_exception(
427
427
  "Message needs to have multi-modal input like images.",
428
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
428
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
429
429
  )
430
430
  if assistant_message_count == 0:
431
431
  raise_exception(
432
432
  "Assistant role required in one of the messages.",
433
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
433
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
434
434
  )
435
435
  if user_message_count == 0:
436
436
  raise_exception(
437
437
  "User role required in one of the messages.",
438
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
438
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
439
439
  )
440
440
  if assistant_message_count > 1:
441
441
  raise_exception(
442
442
  "Evaluators for multimodal conversations only support single turn. "
443
443
  "User and assistant role expected as the only role in each message.",
444
- ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
444
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
445
445
  )
@@ -22,6 +22,9 @@ class EvaluationMetrics:
22
22
  SELF_HARM = "self_harm"
23
23
  SEXUAL = "sexual"
24
24
  PROTECTED_MATERIAL = "protected_material"
25
+ ARTWORK = "artwork"
26
+ FICTIONAL_CHARACTERS = "fictional_characters"
27
+ LOGOS_AND_BRANDS = "logos_and_brands"
25
28
  XPIA = "xpia"
26
29
 
27
30
 
@@ -147,6 +147,9 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s
147
147
  """
148
148
  handled_metrics = [
149
149
  EvaluationMetrics.PROTECTED_MATERIAL,
150
+ EvaluationMetrics.FICTIONAL_CHARACTERS,
151
+ EvaluationMetrics.ARTWORK,
152
+ EvaluationMetrics.LOGOS_AND_BRANDS,
150
153
  _InternalEvaluationMetrics.ECI,
151
154
  EvaluationMetrics.XPIA,
152
155
  ]
@@ -525,7 +528,7 @@ def _process_column_mappings(
525
528
 
526
529
  processed_config: Dict[str, Dict[str, str]] = {}
527
530
 
528
- unexpected_references = re.compile(r"\${(?!target\.|data\.).+?}")
531
+ expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-Z_]+\}$")
529
532
 
530
533
  if column_mapping:
531
534
  for evaluator, mapping_config in column_mapping.items():
@@ -534,7 +537,7 @@ def _process_column_mappings(
534
537
 
535
538
  for map_to_key, map_value in mapping_config.items():
536
539
  # Check if there's any unexpected reference other than ${target.} or ${data.}
537
- if unexpected_references.search(map_value):
540
+ if not expected_references.search(map_value):
538
541
  msg = "Unexpected references detected in 'column_mapping'. Ensure only ${target.} and ${data.} are used."
539
542
  raise EvaluationException(
540
543
  message=msg,
@@ -63,7 +63,6 @@ class ErrorTarget(Enum):
63
63
  RAI_CLIENT = "RAIClient"
64
64
  COHERENCE_EVALUATOR = "CoherenceEvaluator"
65
65
  CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
66
- CONTENT_SAFETY_MULTIMODAL_EVALUATOR = "ContentSafetyMultimodalEvaluator"
67
66
  ECI_EVALUATOR = "ECIEvaluator"
68
67
  F1_EVALUATOR = "F1Evaluator"
69
68
  GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------