azure-ai-evaluation 1.0.0__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (108) hide show
  1. azure/ai/evaluation/__init__.py +4 -26
  2. azure/ai/evaluation/_common/constants.py +2 -9
  3. azure/ai/evaluation/_common/rai_service.py +122 -302
  4. azure/ai/evaluation/_common/utils.py +35 -393
  5. azure/ai/evaluation/_constants.py +6 -28
  6. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/__init__.py +2 -3
  7. azure/ai/evaluation/_evaluate/{_batch_run/eval_run_context.py → _batch_run_client/batch_run_context.py} +8 -25
  8. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/code_client.py +30 -68
  9. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +61 -0
  10. azure/ai/evaluation/_evaluate/_eval_run.py +40 -117
  11. azure/ai/evaluation/_evaluate/_evaluate.py +255 -416
  12. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +19 -24
  13. azure/ai/evaluation/_evaluate/_utils.py +47 -108
  14. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +19 -18
  15. azure/ai/evaluation/_evaluators/{_retrieval → _chat}/__init__.py +2 -2
  16. azure/ai/evaluation/_evaluators/_chat/_chat.py +350 -0
  17. azure/ai/evaluation/_evaluators/{_service_groundedness → _chat/retrieval}/__init__.py +2 -2
  18. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +163 -0
  19. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
  20. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +93 -78
  21. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +39 -76
  22. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +4 -0
  23. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +68 -104
  24. azure/ai/evaluation/_evaluators/{_multimodal/_content_safety_multimodal_base.py → _content_safety/_content_safety_base.py} +35 -24
  25. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +296 -0
  26. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +54 -105
  27. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +52 -99
  28. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +52 -101
  29. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +51 -101
  30. azure/ai/evaluation/_evaluators/_eci/_eci.py +55 -45
  31. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -36
  32. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +94 -76
  33. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +41 -66
  34. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +17 -15
  35. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +92 -113
  36. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
  37. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +27 -21
  38. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +80 -89
  39. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
  40. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
  41. azure/ai/evaluation/_evaluators/_qa/_qa.py +43 -25
  42. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +101 -84
  43. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +47 -78
  44. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +27 -27
  45. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +45 -55
  46. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +5 -0
  47. azure/ai/evaluation/_evaluators/_xpia/xpia.py +106 -91
  48. azure/ai/evaluation/_exceptions.py +7 -28
  49. azure/ai/evaluation/_http_utils.py +134 -205
  50. azure/ai/evaluation/_model_configurations.py +8 -104
  51. azure/ai/evaluation/_version.py +1 -1
  52. azure/ai/evaluation/simulator/__init__.py +2 -3
  53. azure/ai/evaluation/simulator/_adversarial_scenario.py +1 -20
  54. azure/ai/evaluation/simulator/_adversarial_simulator.py +95 -116
  55. azure/ai/evaluation/simulator/_constants.py +1 -11
  56. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -14
  57. azure/ai/evaluation/simulator/_conversation/_conversation.py +20 -20
  58. azure/ai/evaluation/simulator/_direct_attack_simulator.py +68 -34
  59. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -1
  60. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +28 -31
  61. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +95 -108
  62. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +22 -70
  63. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +14 -30
  64. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +14 -25
  65. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +24 -68
  66. azure/ai/evaluation/simulator/_model_tools/models.py +21 -19
  67. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +10 -6
  68. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +5 -6
  69. azure/ai/evaluation/simulator/_tracing.py +28 -25
  70. azure/ai/evaluation/simulator/_utils.py +13 -34
  71. azure/ai/evaluation/simulator/simulator.py +579 -0
  72. azure_ai_evaluation-1.0.0b1.dist-info/METADATA +377 -0
  73. azure_ai_evaluation-1.0.0b1.dist-info/RECORD +97 -0
  74. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b1.dist-info}/WHEEL +1 -1
  75. azure/ai/evaluation/_common/_experimental.py +0 -172
  76. azure/ai/evaluation/_common/math.py +0 -89
  77. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -99
  78. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -46
  79. azure/ai/evaluation/_evaluators/_common/__init__.py +0 -13
  80. azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -344
  81. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -88
  82. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -133
  83. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -113
  84. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -99
  85. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  86. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  87. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  88. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  89. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  90. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  91. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  92. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -112
  93. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -93
  94. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -148
  95. azure/ai/evaluation/_vendor/__init__.py +0 -3
  96. azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -14
  97. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -328
  98. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -63
  99. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -63
  100. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -53
  101. azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  102. azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -1150
  103. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  104. azure/ai/evaluation/simulator/_simulator.py +0 -716
  105. azure_ai_evaluation-1.0.0.dist-info/METADATA +0 -595
  106. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +0 -70
  107. azure_ai_evaluation-1.0.0.dist-info/RECORD +0 -119
  108. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -4,42 +4,30 @@
4
4
 
5
5
  from ._evaluate._evaluate import evaluate
6
6
  from ._evaluators._bleu import BleuScoreEvaluator
7
+ from ._evaluators._chat import ChatEvaluator
7
8
  from ._evaluators._coherence import CoherenceEvaluator
8
9
  from ._evaluators._content_safety import (
10
+ ContentSafetyChatEvaluator,
9
11
  ContentSafetyEvaluator,
10
12
  HateUnfairnessEvaluator,
11
13
  SelfHarmEvaluator,
12
14
  SexualEvaluator,
13
15
  ViolenceEvaluator,
14
16
  )
15
- from ._evaluators._multimodal._content_safety_multimodal import (
16
- ContentSafetyMultimodalEvaluator,
17
- HateUnfairnessMultimodalEvaluator,
18
- SelfHarmMultimodalEvaluator,
19
- SexualMultimodalEvaluator,
20
- ViolenceMultimodalEvaluator,
21
- )
22
- from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
23
17
  from ._evaluators._f1_score import F1ScoreEvaluator
24
18
  from ._evaluators._fluency import FluencyEvaluator
25
19
  from ._evaluators._gleu import GleuScoreEvaluator
26
20
  from ._evaluators._groundedness import GroundednessEvaluator
27
- from ._evaluators._service_groundedness import GroundednessProEvaluator
28
21
  from ._evaluators._meteor import MeteorScoreEvaluator
29
22
  from ._evaluators._protected_material import ProtectedMaterialEvaluator
30
23
  from ._evaluators._qa import QAEvaluator
31
24
  from ._evaluators._relevance import RelevanceEvaluator
32
- from ._evaluators._retrieval import RetrievalEvaluator
33
25
  from ._evaluators._rouge import RougeScoreEvaluator, RougeType
34
26
  from ._evaluators._similarity import SimilarityEvaluator
35
27
  from ._evaluators._xpia import IndirectAttackEvaluator
36
28
  from ._model_configurations import (
37
29
  AzureAIProject,
38
30
  AzureOpenAIModelConfiguration,
39
- Conversation,
40
- EvaluationResult,
41
- EvaluatorConfig,
42
- Message,
43
31
  OpenAIModelConfiguration,
44
32
  )
45
33
 
@@ -49,34 +37,24 @@ __all__ = [
49
37
  "F1ScoreEvaluator",
50
38
  "FluencyEvaluator",
51
39
  "GroundednessEvaluator",
52
- "GroundednessProEvaluator",
53
40
  "RelevanceEvaluator",
54
41
  "SimilarityEvaluator",
55
42
  "QAEvaluator",
43
+ "ChatEvaluator",
56
44
  "ViolenceEvaluator",
57
45
  "SexualEvaluator",
58
46
  "SelfHarmEvaluator",
59
47
  "HateUnfairnessEvaluator",
60
48
  "ContentSafetyEvaluator",
49
+ "ContentSafetyChatEvaluator",
61
50
  "IndirectAttackEvaluator",
62
51
  "BleuScoreEvaluator",
63
52
  "GleuScoreEvaluator",
64
53
  "MeteorScoreEvaluator",
65
- "RetrievalEvaluator",
66
54
  "RougeScoreEvaluator",
67
55
  "RougeType",
68
56
  "ProtectedMaterialEvaluator",
69
57
  "AzureAIProject",
70
58
  "AzureOpenAIModelConfiguration",
71
59
  "OpenAIModelConfiguration",
72
- "EvaluatorConfig",
73
- "Conversation",
74
- "Message",
75
- "EvaluationResult",
76
- "ContentSafetyMultimodalEvaluator",
77
- "HateUnfairnessMultimodalEvaluator",
78
- "SelfHarmMultimodalEvaluator",
79
- "SexualMultimodalEvaluator",
80
- "ViolenceMultimodalEvaluator",
81
- "ProtectedMaterialMultimodalEvaluator",
82
60
  ]
@@ -3,11 +3,6 @@
3
3
  # ---------------------------------------------------------
4
4
  from enum import Enum
5
5
 
6
- from azure.core import CaseInsensitiveEnumMeta
7
-
8
-
9
- PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency"]
10
-
11
6
 
12
7
  class CommonConstants:
13
8
  """Define common constants."""
@@ -38,7 +33,6 @@ class Tasks:
38
33
  CONTENT_HARM = "content harm"
39
34
  PROTECTED_MATERIAL = "protected material"
40
35
  XPIA = "xpia"
41
- GROUNDEDNESS = "groundedness"
42
36
 
43
37
 
44
38
  class _InternalAnnotationTasks:
@@ -49,7 +43,7 @@ class _InternalAnnotationTasks:
49
43
  ECI = "eci"
50
44
 
51
45
 
52
- class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
46
+ class EvaluationMetrics:
53
47
  """Evaluation metrics to aid the RAI service in determining what
54
48
  metrics to request, and how to present them back to the user."""
55
49
 
@@ -60,10 +54,9 @@ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
60
54
  SEXUAL = "sexual"
61
55
  PROTECTED_MATERIAL = "protected_material"
62
56
  XPIA = "xpia"
63
- GROUNDEDNESS = "generic_groundedness"
64
57
 
65
58
 
66
- class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
59
+ class _InternalEvaluationMetrics:
67
60
  """Evaluation metrics that are not publicly supported.
68
61
  These metrics are experimental and subject to potential change or migration to the main
69
62
  enum over time.