azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. azure/ai/evaluation/__init__.py +82 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/_experimental.py +172 -0
  4. azure/ai/evaluation/_common/constants.py +72 -0
  5. azure/ai/evaluation/_common/math.py +89 -0
  6. azure/ai/evaluation/_common/rai_service.py +632 -0
  7. azure/ai/evaluation/_common/utils.py +445 -0
  8. azure/ai/evaluation/_constants.py +72 -0
  9. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  10. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
  11. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
  12. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
  13. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
  14. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  15. azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
  16. azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
  17. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
  18. azure/ai/evaluation/_evaluate/_utils.py +298 -0
  19. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  20. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  21. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
  22. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  23. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
  24. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
  25. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  26. azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
  27. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
  28. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  47. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  48. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  49. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
  50. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  51. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  52. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  53. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  54. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  55. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  56. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  57. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  58. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  59. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
  60. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
  62. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
  64. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
  65. azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
  66. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
  67. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  68. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  69. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  70. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  72. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  73. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
  74. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
  75. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  76. azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
  77. azure/ai/evaluation/_exceptions.py +128 -0
  78. azure/ai/evaluation/_http_utils.py +466 -0
  79. azure/ai/evaluation/_model_configurations.py +123 -0
  80. azure/ai/evaluation/_user_agent.py +6 -0
  81. azure/ai/evaluation/_vendor/__init__.py +3 -0
  82. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  83. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  84. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  85. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  86. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  87. azure/ai/evaluation/_version.py +5 -0
  88. azure/ai/evaluation/py.typed +0 -0
  89. azure/ai/evaluation/simulator/__init__.py +16 -0
  90. azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
  91. azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
  92. azure/ai/evaluation/simulator/_constants.py +27 -0
  93. azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
  94. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  95. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  96. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  97. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  98. azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
  99. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  100. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  101. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
  102. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
  103. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  104. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
  105. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
  106. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
  107. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
  108. azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
  109. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  110. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
  111. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
  112. azure/ai/evaluation/simulator/_simulator.py +716 -0
  113. azure/ai/evaluation/simulator/_tracing.py +89 -0
  114. azure/ai/evaluation/simulator/_utils.py +132 -0
  115. azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
  116. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
  117. azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
  118. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
  119. azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
  120. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  121. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  122. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,82 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ._evaluate._evaluate import evaluate
6
+ from ._evaluators._bleu import BleuScoreEvaluator
7
+ from ._evaluators._coherence import CoherenceEvaluator
8
+ from ._evaluators._content_safety import (
9
+ ContentSafetyEvaluator,
10
+ HateUnfairnessEvaluator,
11
+ SelfHarmEvaluator,
12
+ SexualEvaluator,
13
+ ViolenceEvaluator,
14
+ )
15
+ from ._evaluators._multimodal._content_safety_multimodal import (
16
+ ContentSafetyMultimodalEvaluator,
17
+ HateUnfairnessMultimodalEvaluator,
18
+ SelfHarmMultimodalEvaluator,
19
+ SexualMultimodalEvaluator,
20
+ ViolenceMultimodalEvaluator,
21
+ )
22
+ from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
23
+ from ._evaluators._f1_score import F1ScoreEvaluator
24
+ from ._evaluators._fluency import FluencyEvaluator
25
+ from ._evaluators._gleu import GleuScoreEvaluator
26
+ from ._evaluators._groundedness import GroundednessEvaluator
27
+ from ._evaluators._service_groundedness import GroundednessProEvaluator
28
+ from ._evaluators._meteor import MeteorScoreEvaluator
29
+ from ._evaluators._protected_material import ProtectedMaterialEvaluator
30
+ from ._evaluators._qa import QAEvaluator
31
+ from ._evaluators._relevance import RelevanceEvaluator
32
+ from ._evaluators._retrieval import RetrievalEvaluator
33
+ from ._evaluators._rouge import RougeScoreEvaluator, RougeType
34
+ from ._evaluators._similarity import SimilarityEvaluator
35
+ from ._evaluators._xpia import IndirectAttackEvaluator
36
+ from ._model_configurations import (
37
+ AzureAIProject,
38
+ AzureOpenAIModelConfiguration,
39
+ Conversation,
40
+ EvaluationResult,
41
+ EvaluatorConfig,
42
+ Message,
43
+ OpenAIModelConfiguration,
44
+ )
45
+
46
+ __all__ = [
47
+ "evaluate",
48
+ "CoherenceEvaluator",
49
+ "F1ScoreEvaluator",
50
+ "FluencyEvaluator",
51
+ "GroundednessEvaluator",
52
+ "GroundednessProEvaluator",
53
+ "RelevanceEvaluator",
54
+ "SimilarityEvaluator",
55
+ "QAEvaluator",
56
+ "ViolenceEvaluator",
57
+ "SexualEvaluator",
58
+ "SelfHarmEvaluator",
59
+ "HateUnfairnessEvaluator",
60
+ "ContentSafetyEvaluator",
61
+ "IndirectAttackEvaluator",
62
+ "BleuScoreEvaluator",
63
+ "GleuScoreEvaluator",
64
+ "MeteorScoreEvaluator",
65
+ "RetrievalEvaluator",
66
+ "RougeScoreEvaluator",
67
+ "RougeType",
68
+ "ProtectedMaterialEvaluator",
69
+ "AzureAIProject",
70
+ "AzureOpenAIModelConfiguration",
71
+ "OpenAIModelConfiguration",
72
+ "EvaluatorConfig",
73
+ "Conversation",
74
+ "Message",
75
+ "EvaluationResult",
76
+ "ContentSafetyMultimodalEvaluator",
77
+ "HateUnfairnessMultimodalEvaluator",
78
+ "SelfHarmMultimodalEvaluator",
79
+ "SexualMultimodalEvaluator",
80
+ "ViolenceMultimodalEvaluator",
81
+ "ProtectedMaterialMultimodalEvaluator",
82
+ ]
@@ -0,0 +1,16 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # To minimize relative imports in our evaluators, the scope of this package also includes anything
6
+ # that would have otherwise been a relative import scoped to single evaluator directories.
7
+
8
+ from . import constants
9
+ from .rai_service import evaluate_with_rai_service
10
+ from .utils import get_harm_severity_level
11
+
12
+ __all__ = [
13
+ "get_harm_severity_level",
14
+ "evaluate_with_rai_service",
15
+ "constants",
16
+ ]
@@ -0,0 +1,172 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import os
6
+ import functools
7
+ import inspect
8
+ import logging
9
+ import sys
10
+ from typing import Callable, Type, TypeVar, Union, overload
11
+
12
+ from typing_extensions import ParamSpec, TypeGuard
13
+
14
+ DOCSTRING_TEMPLATE = ".. note:: {0} {1}\n\n"
15
+ DOCSTRING_DEFAULT_INDENTATION = 8
16
+ EXPERIMENTAL_CLASS_MESSAGE = "This is an experimental class,"
17
+ EXPERIMENTAL_METHOD_MESSAGE = "This is an experimental method,"
18
+ EXPERIMENTAL_FIELD_MESSAGE = "This is an experimental field,"
19
+ EXPERIMENTAL_LINK_MESSAGE = (
20
+ "and may change at any time. Please see https://aka.ms/azuremlexperimental for more information."
21
+ )
22
+
23
+ _warning_cache = set()
24
+ module_logger = logging.getLogger(__name__)
25
+
26
+ P = ParamSpec("P")
27
+ T = TypeVar("T")
28
+
29
+
30
+ @overload
31
+ def experimental(wrapped: Type[T]) -> Type[T]: ...
32
+
33
+
34
+ @overload
35
+ def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ...
36
+
37
+
38
+ def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]:
39
+ """Add experimental tag to a class or a method.
40
+
41
+ :param wrapped: Either a Class or Function to mark as experimental
42
+ :type wrapped: Union[Type[T], Callable[P, T]]
43
+ :return: The wrapped class or method
44
+ :rtype: Union[Type[T], Callable[P, T]]
45
+ """
46
+
47
+ def is_class(t: Union[Type[T], Callable[P, T]]) -> TypeGuard[Type[T]]:
48
+ return isinstance(t, type)
49
+
50
+ if is_class(wrapped):
51
+ return _add_class_docstring(wrapped)
52
+ if inspect.isfunction(wrapped):
53
+ return _add_method_docstring(wrapped)
54
+ return wrapped
55
+
56
+
57
+ def _add_class_docstring(cls: Type[T]) -> Type[T]:
58
+ """Add experimental tag to the class doc string.
59
+
60
+ :return: The updated class
61
+ :rtype: Type[T]
62
+ """
63
+
64
+ P2 = ParamSpec("P2")
65
+
66
+ def _add_class_warning(func: Callable[P2, None]) -> Callable[P2, None]:
67
+ """Add warning message for class __init__.
68
+
69
+ :param func: The original __init__ function
70
+ :type func: Callable[P2, None]
71
+ :return: Updated __init__
72
+ :rtype: Callable[P2, None]
73
+ """
74
+
75
+ @functools.wraps(func)
76
+ def wrapped(*args, **kwargs):
77
+ message = "Class {0}: {1} {2}".format(cls.__name__, EXPERIMENTAL_CLASS_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
78
+ if not _should_skip_warning() and not _is_warning_cached(message):
79
+ module_logger.warning(message)
80
+ return func(*args, **kwargs)
81
+
82
+ return wrapped
83
+
84
+ doc_string = DOCSTRING_TEMPLATE.format(EXPERIMENTAL_CLASS_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
85
+ if cls.__doc__:
86
+ cls.__doc__ = _add_note_to_docstring(cls.__doc__, doc_string)
87
+ else:
88
+ cls.__doc__ = doc_string + ">"
89
+ cls.__init__ = _add_class_warning(cls.__init__) # type: ignore[method-assign]
90
+ return cls
91
+
92
+
93
+ def _add_method_docstring(func: Callable[P, T]) -> Callable[P, T]:
94
+ """Add experimental tag to the method doc string.
95
+
96
+ :param func: The function to update
97
+ :type func: Callable[P, T]
98
+ :return: A wrapped method marked as experimental
99
+ :rtype: Callable[P,T]
100
+ """
101
+ doc_string = DOCSTRING_TEMPLATE.format(EXPERIMENTAL_METHOD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
102
+ if func.__doc__:
103
+ func.__doc__ = _add_note_to_docstring(func.__doc__, doc_string)
104
+ else:
105
+ # '>' is required. Otherwise the note section can't be generated
106
+ func.__doc__ = doc_string + ">"
107
+
108
+ @functools.wraps(func)
109
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> T:
110
+ message = "Method {0}: {1} {2}".format(func.__name__, EXPERIMENTAL_METHOD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
111
+ if not _should_skip_warning() and not _is_warning_cached(message):
112
+ module_logger.warning(message)
113
+ return func(*args, **kwargs)
114
+
115
+ return wrapped
116
+
117
+
118
+ def _add_note_to_docstring(doc_string: str, note: str) -> str:
119
+ """Adds experimental note to docstring at the top and correctly indents original docstring.
120
+
121
+ :param doc_string: The docstring
122
+ :type doc_string: str
123
+ :param note: The note to add to the docstring
124
+ :type note: str
125
+ :return: Updated docstring
126
+ :rtype: str
127
+ """
128
+ indent = _get_indentation_size(doc_string)
129
+ doc_string = doc_string.rjust(len(doc_string) + indent)
130
+ return note + doc_string
131
+
132
+
133
+ def _get_indentation_size(doc_string: str) -> int:
134
+ """Finds the minimum indentation of all non-blank lines after the first line.
135
+
136
+ :param doc_string: The docstring
137
+ :type doc_string: str
138
+ :return: Minimum number of indentation of the docstring
139
+ :rtype: int
140
+ """
141
+ lines = doc_string.expandtabs().splitlines()
142
+ indent = sys.maxsize
143
+ for line in lines[1:]:
144
+ stripped = line.lstrip()
145
+ if stripped:
146
+ indent = min(indent, len(line) - len(stripped))
147
+ return indent if indent < sys.maxsize else DOCSTRING_DEFAULT_INDENTATION
148
+
149
+
150
+ def _should_skip_warning():
151
+ skip_warning_msg = False
152
+
153
+ if os.getenv("AI_EVALS_DISABLE_EXPERIMENTAL_WARNING", "false").lower() == "true":
154
+ skip_warning_msg = True
155
+
156
+ # Cases where we want to suppress the warning:
157
+ # 1. When converting from REST object to SDK object
158
+ for frame in inspect.stack():
159
+ if frame.function == "_from_rest_object":
160
+ skip_warning_msg = True
161
+ break
162
+
163
+ return skip_warning_msg
164
+
165
+
166
+ def _is_warning_cached(warning_msg):
167
+ # use cache to make sure we only print same warning message once under same session
168
+ # this prevents duplicated warnings got printed when user does a loop call on a method or a class
169
+ if warning_msg in _warning_cache:
170
+ return True
171
+ _warning_cache.add(warning_msg)
172
+ return False
@@ -0,0 +1,72 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ from enum import Enum
5
+
6
+ from azure.core import CaseInsensitiveEnumMeta
7
+
8
+
9
+ PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency"]
10
+
11
+
12
+ class CommonConstants:
13
+ """Define common constants."""
14
+
15
+ DEFAULT_HTTP_TIMEOUT = 60
16
+
17
+
18
+ class RAIService:
19
+ """Define constants related to RAI service"""
20
+
21
+ TIMEOUT = 1800
22
+ SLEEP_TIME = 2
23
+ HARM_SEVERITY_THRESHOLD = 4
24
+
25
+
26
+ class HarmSeverityLevel(Enum):
27
+ """Harm severity levels."""
28
+
29
+ VeryLow = "Very low"
30
+ Low = "Low"
31
+ Medium = "Medium"
32
+ High = "High"
33
+
34
+
35
+ class Tasks:
36
+ """Defines types of annotation tasks supported by RAI Service."""
37
+
38
+ CONTENT_HARM = "content harm"
39
+ PROTECTED_MATERIAL = "protected material"
40
+ XPIA = "xpia"
41
+ GROUNDEDNESS = "groundedness"
42
+
43
+
44
+ class _InternalAnnotationTasks:
45
+ """Defines types of annotation tasks that are supported for internal use. Such tasks are
46
+ experimental and subject to potential change or migration to the main Evaluation Metrics
47
+ enum over time."""
48
+
49
+ ECI = "eci"
50
+
51
+
52
+ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
53
+ """Evaluation metrics to aid the RAI service in determining what
54
+ metrics to request, and how to present them back to the user."""
55
+
56
+ HATE_FAIRNESS = "hate_fairness"
57
+ HATE_UNFAIRNESS = "hate_unfairness"
58
+ VIOLENCE = "violence"
59
+ SELF_HARM = "self_harm"
60
+ SEXUAL = "sexual"
61
+ PROTECTED_MATERIAL = "protected_material"
62
+ XPIA = "xpia"
63
+ GROUNDEDNESS = "generic_groundedness"
64
+
65
+
66
+ class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
67
+ """Evaluation metrics that are not publicly supported.
68
+ These metrics are experimental and subject to potential change or migration to the main
69
+ enum over time.
70
+ """
71
+
72
+ ECI = "eci"
@@ -0,0 +1,89 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import math
6
+ from typing import List, Callable, Any
7
+
8
+ from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
9
+
10
+
11
+ def list_sum(lst: List[float]) -> float:
12
+ """Given a list of floats, return the sum of the values.
13
+
14
+ :param lst: A list of floats.
15
+ :type lst: List[float]
16
+ :return: The sum of the values in the list.
17
+ :rtype: float
18
+ """
19
+
20
+ return sum(lst)
21
+
22
+
23
+ def list_mean(lst: List[float]) -> float:
24
+ """Given a list of floats, calculate the mean of the values.
25
+
26
+ :param lst: A list of floats.
27
+ :type lst: List[float]
28
+ :return: The mean of the values in the list.
29
+ :rtype: float
30
+ """
31
+
32
+ return list_sum(lst) / len(lst)
33
+
34
+
35
+ def list_mean_nan_safe(lst: List[float]) -> float:
36
+ """Given a list of floats, remove all nan or None values, then calculate the mean of the remaining values.
37
+
38
+ :param lst: A list of floats.
39
+ :type lst: List[float]
40
+ :return: The mean of the values in the list.
41
+ :rtype: float
42
+ """
43
+
44
+ msg = "All score values are NaN. The mean cannot be calculated."
45
+ if all(math.isnan(l) for l in lst):
46
+ raise EvaluationException(
47
+ message=msg,
48
+ internal_message=msg,
49
+ blame=ErrorBlame.USER_ERROR,
50
+ category=ErrorCategory.INVALID_VALUE,
51
+ target=ErrorTarget.CONVERSATION,
52
+ )
53
+ return list_mean([l for l in lst if not is_none_or_nan(l)])
54
+
55
+
56
+ def apply_transform_nan_safe(lst: List[float], transform_fn: Callable[[float], Any]) -> List[Any]:
57
+ """Given a list of floats, remove all nan values, then apply the inputted transform function
58
+ to the remaining values, and return the resulting list of outputted values.
59
+
60
+ :param lst: A list of floats.
61
+ :type lst: List[float]
62
+ :param transform_fn: A function that produces something when applied to a float.
63
+ :type transform_fn: Callable[[float], Any]
64
+ :return: A list of the transformed values.
65
+ :rtype: List[Any]
66
+ """
67
+
68
+ msg = "All score values are NaN. The mean cannot be calculated."
69
+ if all(math.isnan(l) for l in lst):
70
+ raise EvaluationException(
71
+ message=msg,
72
+ internal_message=msg,
73
+ blame=ErrorBlame.USER_ERROR,
74
+ category=ErrorCategory.INVALID_VALUE,
75
+ target=ErrorTarget.CONVERSATION,
76
+ )
77
+ return [transform_fn(l) for l in lst if not is_none_or_nan(l)]
78
+
79
+
80
+ def is_none_or_nan(val: float) -> bool:
81
+ """math.isnan raises an error if None is inputted. This is a more robust wrapper.
82
+
83
+ :param val: The value to check.
84
+ :type val: float
85
+ :return: Whether the value is None or NaN.
86
+ :rtype: bool
87
+ """
88
+
89
+ return val is None or math.isnan(val)