azure-ai-evaluation 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/_azure/_clients.py +24 -8
- azure/ai/evaluation/_azure/_models.py +2 -2
- azure/ai/evaluation/_constants.py +18 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +2 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +3 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +69 -12
- azure/ai/evaluation/_evaluate/_utils.py +27 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +46 -25
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +69 -4
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +61 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +7 -1
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +5 -42
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +2 -0
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +2 -0
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +2 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +2 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +61 -68
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +45 -23
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +55 -34
- azure/ai/evaluation/_evaluators/_qa/_qa.py +32 -27
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +44 -23
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +41 -81
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_simulator.py +21 -13
- {azure_ai_evaluation-1.1.0.dist-info → azure_ai_evaluation-1.2.0.dist-info}/METADATA +71 -7
- {azure_ai_evaluation-1.1.0.dist-info → azure_ai_evaluation-1.2.0.dist-info}/RECORD +31 -29
- {azure_ai_evaluation-1.1.0.dist-info → azure_ai_evaluation-1.2.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.1.0.dist-info → azure_ai_evaluation-1.2.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.1.0.dist-info → azure_ai_evaluation-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from typing import Callable, Dict, List, Union
|
|
5
|
+
from typing import Union
|
|
7
6
|
|
|
8
|
-
from
|
|
7
|
+
from typing_extensions import overload, override
|
|
8
|
+
|
|
9
|
+
from azure.ai.evaluation._evaluators._common import MultiEvaluatorBase
|
|
9
10
|
|
|
10
11
|
from .._coherence import CoherenceEvaluator
|
|
11
12
|
from .._f1_score import F1ScoreEvaluator
|
|
@@ -15,7 +16,7 @@ from .._relevance import RelevanceEvaluator
|
|
|
15
16
|
from .._similarity import SimilarityEvaluator
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
class QAEvaluator:
|
|
19
|
+
class QAEvaluator(MultiEvaluatorBase[Union[str, float]]):
|
|
19
20
|
"""
|
|
20
21
|
Initialize a question-answer evaluator configured for a specific Azure OpenAI model.
|
|
21
22
|
|
|
@@ -46,9 +47,7 @@ class QAEvaluator:
|
|
|
46
47
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
47
48
|
|
|
48
49
|
def __init__(self, model_config, **kwargs):
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
self._evaluators: List[Union[Callable[..., Dict[str, Union[str, float]]], Callable[..., Dict[str, float]]]] = [
|
|
50
|
+
evaluators = [
|
|
52
51
|
GroundednessEvaluator(model_config),
|
|
53
52
|
RelevanceEvaluator(model_config),
|
|
54
53
|
CoherenceEvaluator(model_config),
|
|
@@ -56,8 +55,31 @@ class QAEvaluator:
|
|
|
56
55
|
SimilarityEvaluator(model_config),
|
|
57
56
|
F1ScoreEvaluator(),
|
|
58
57
|
]
|
|
58
|
+
super().__init__(evaluators=evaluators, **kwargs)
|
|
59
|
+
|
|
60
|
+
@overload # type: ignore
|
|
61
|
+
def __call__(self, *, query: str, response: str, context: str, ground_truth: str):
|
|
62
|
+
"""
|
|
63
|
+
Evaluates question-answering scenario.
|
|
64
|
+
|
|
65
|
+
:keyword query: The query to be evaluated.
|
|
66
|
+
:paramtype query: str
|
|
67
|
+
:keyword response: The response to be evaluated.
|
|
68
|
+
:paramtype response: str
|
|
69
|
+
:keyword context: The context to be evaluated.
|
|
70
|
+
:paramtype context: str
|
|
71
|
+
:keyword ground_truth: The ground truth to be evaluated.
|
|
72
|
+
:paramtype ground_truth: str
|
|
73
|
+
:return: The scores for QA scenario.
|
|
74
|
+
:rtype: Dict[str, Union[str, float]]
|
|
75
|
+
"""
|
|
59
76
|
|
|
60
|
-
|
|
77
|
+
@override
|
|
78
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
79
|
+
self,
|
|
80
|
+
*args,
|
|
81
|
+
**kwargs,
|
|
82
|
+
):
|
|
61
83
|
"""
|
|
62
84
|
Evaluates question-answering scenario.
|
|
63
85
|
|
|
@@ -72,22 +94,5 @@ class QAEvaluator:
|
|
|
72
94
|
:return: The scores for QA scenario.
|
|
73
95
|
:rtype: Dict[str, Union[str, float]]
|
|
74
96
|
"""
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
with ThreadPoolExecutor() as executor:
|
|
78
|
-
futures = {
|
|
79
|
-
executor.submit(
|
|
80
|
-
evaluator, query=query, response=response, context=context, ground_truth=ground_truth, **kwargs
|
|
81
|
-
): evaluator
|
|
82
|
-
for evaluator in self._evaluators
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
# Collect results as they complete
|
|
86
|
-
for future in as_completed(futures):
|
|
87
|
-
results.update(future.result())
|
|
88
|
-
else:
|
|
89
|
-
for evaluator in self._evaluators:
|
|
90
|
-
result = evaluator(query=query, response=response, context=context, ground_truth=ground_truth, **kwargs)
|
|
91
|
-
results.update(result)
|
|
92
|
-
|
|
93
|
-
return results
|
|
97
|
+
|
|
98
|
+
return super().__call__(*args, **kwargs)
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
from enum import Enum
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from typing import Dict
|
|
7
|
+
from typing_extensions import overload, override
|
|
7
8
|
|
|
8
9
|
from azure.ai.evaluation._vendor.rouge_score import rouge_scorer
|
|
10
|
+
from azure.ai.evaluation._evaluators._common import EvaluatorBase
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class RougeType(Enum):
|
|
@@ -32,21 +34,7 @@ class RougeType(Enum):
|
|
|
32
34
|
"""Overlap of L-grams (L consecutive words) between generated and reference text."""
|
|
33
35
|
|
|
34
36
|
|
|
35
|
-
class
|
|
36
|
-
def __init__(self, rouge_type: RougeType):
|
|
37
|
-
self._rouge_type = rouge_type
|
|
38
|
-
|
|
39
|
-
async def __call__(self, *, ground_truth: str, response: str, **kwargs):
|
|
40
|
-
scorer = rouge_scorer.RougeScorer(rouge_types=[self._rouge_type.value])
|
|
41
|
-
metrics = scorer.score(ground_truth, response)[self._rouge_type.value]
|
|
42
|
-
return {
|
|
43
|
-
"rouge_precision": metrics.precision,
|
|
44
|
-
"rouge_recall": metrics.recall,
|
|
45
|
-
"rouge_f1_score": metrics.fmeasure,
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class RougeScoreEvaluator:
|
|
37
|
+
class RougeScoreEvaluator(EvaluatorBase):
|
|
50
38
|
"""
|
|
51
39
|
Calculates the ROUGE score for a given response and ground truth.
|
|
52
40
|
|
|
@@ -76,10 +64,32 @@ class RougeScoreEvaluator:
|
|
|
76
64
|
id = "azureml://registries/azureml/models/Rouge-Score-Evaluator/versions/3"
|
|
77
65
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
78
66
|
|
|
67
|
+
@override
|
|
79
68
|
def __init__(self, rouge_type: RougeType):
|
|
80
|
-
self.
|
|
69
|
+
self._rouge_type = rouge_type
|
|
70
|
+
super().__init__()
|
|
71
|
+
|
|
72
|
+
@override
|
|
73
|
+
async def _do_eval(self, eval_input: Dict) -> Dict[str, float]:
|
|
74
|
+
"""Produce a rouge score evaluation result.
|
|
81
75
|
|
|
82
|
-
|
|
76
|
+
:param eval_input: The input to the evaluation function.
|
|
77
|
+
:type eval_input: Dict
|
|
78
|
+
:return: The evaluation result.
|
|
79
|
+
:rtype: Dict
|
|
80
|
+
"""
|
|
81
|
+
ground_truth = eval_input["ground_truth"]
|
|
82
|
+
response = eval_input["response"]
|
|
83
|
+
scorer = rouge_scorer.RougeScorer(rouge_types=[self._rouge_type.value])
|
|
84
|
+
metrics = scorer.score(ground_truth, response)[self._rouge_type.value]
|
|
85
|
+
return {
|
|
86
|
+
"rouge_precision": metrics.precision,
|
|
87
|
+
"rouge_recall": metrics.recall,
|
|
88
|
+
"rouge_f1_score": metrics.fmeasure,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@overload # type: ignore
|
|
92
|
+
def __call__(self, *, ground_truth: str, response: str) -> Dict[str, float]:
|
|
83
93
|
"""
|
|
84
94
|
Evaluate the ROUGE score between the response and the ground truth.
|
|
85
95
|
|
|
@@ -90,9 +100,20 @@ class RougeScoreEvaluator:
|
|
|
90
100
|
:return: The ROUGE score.
|
|
91
101
|
:rtype: Dict[str, float]
|
|
92
102
|
"""
|
|
93
|
-
return async_run_allowing_running_loop(
|
|
94
|
-
self._async_evaluator, ground_truth=ground_truth, response=response, **kwargs
|
|
95
|
-
)
|
|
96
103
|
|
|
97
|
-
|
|
98
|
-
|
|
104
|
+
@override
|
|
105
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
106
|
+
self,
|
|
107
|
+
*args,
|
|
108
|
+
**kwargs,
|
|
109
|
+
):
|
|
110
|
+
"""
|
|
111
|
+
Evaluate route score.
|
|
112
|
+
:keyword response: The response to be evaluated.
|
|
113
|
+
:paramtype response: str
|
|
114
|
+
:keyword ground_truth: The ground truth to be compared against.
|
|
115
|
+
:paramtype ground_truth: str
|
|
116
|
+
:return: The ROUGE score.
|
|
117
|
+
:rtype: Dict[str, float]
|
|
118
|
+
"""
|
|
119
|
+
return super().__call__(*args, **kwargs)
|
|
@@ -2,83 +2,15 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
import math
|
|
6
5
|
import os
|
|
7
|
-
import
|
|
6
|
+
from typing import Dict
|
|
8
7
|
|
|
9
|
-
from
|
|
10
|
-
from promptflow.core import AsyncPrompty
|
|
8
|
+
from typing_extensions import overload, override
|
|
11
9
|
|
|
12
|
-
from azure.ai.evaluation.
|
|
10
|
+
from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
|
|
13
11
|
|
|
14
|
-
from ..._common.utils import construct_prompty_model_config, validate_model_config
|
|
15
12
|
|
|
16
|
-
|
|
17
|
-
from ..._user_agent import USER_AGENT
|
|
18
|
-
except ImportError:
|
|
19
|
-
USER_AGENT = "None"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class _AsyncSimilarityEvaluator:
|
|
23
|
-
# Constants must be defined within eval's directory to be save/loadable
|
|
24
|
-
_PROMPTY_FILE = "similarity.prompty"
|
|
25
|
-
_LLM_CALL_TIMEOUT = 600
|
|
26
|
-
_DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
|
|
27
|
-
|
|
28
|
-
def __init__(self, model_config: dict):
|
|
29
|
-
prompty_model_config = construct_prompty_model_config(
|
|
30
|
-
validate_model_config(model_config),
|
|
31
|
-
self._DEFAULT_OPEN_API_VERSION,
|
|
32
|
-
USER_AGENT,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
current_dir = os.path.dirname(__file__)
|
|
36
|
-
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
|
|
37
|
-
self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
|
|
38
|
-
|
|
39
|
-
async def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs):
|
|
40
|
-
"""
|
|
41
|
-
Evaluate similarity.
|
|
42
|
-
|
|
43
|
-
:keyword query: The query to be evaluated.
|
|
44
|
-
:paramtype query: str
|
|
45
|
-
:keyword response: The response to be evaluated.
|
|
46
|
-
:paramtype response: str
|
|
47
|
-
:keyword ground_truth: The ground truth to be evaluated.
|
|
48
|
-
:paramtype ground_truth: str
|
|
49
|
-
:return: The similarity score.
|
|
50
|
-
:rtype: Dict[str, float]
|
|
51
|
-
"""
|
|
52
|
-
# Validate input parameters
|
|
53
|
-
query = str(query or "")
|
|
54
|
-
response = str(response or "")
|
|
55
|
-
ground_truth = str(ground_truth or "")
|
|
56
|
-
|
|
57
|
-
if not (query.strip() and response.strip() and ground_truth.strip()):
|
|
58
|
-
msg = "'query', 'response' and 'ground_truth' must be non-empty strings."
|
|
59
|
-
raise EvaluationException(
|
|
60
|
-
message=msg,
|
|
61
|
-
internal_message=msg,
|
|
62
|
-
error_category=ErrorCategory.MISSING_FIELD,
|
|
63
|
-
error_blame=ErrorBlame.USER_ERROR,
|
|
64
|
-
error_target=ErrorTarget.SIMILARITY_EVALUATOR,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
# Run the evaluation flow
|
|
68
|
-
llm_output = await self._flow(
|
|
69
|
-
query=query, response=response, ground_truth=ground_truth, timeout=self._LLM_CALL_TIMEOUT, **kwargs
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
score = math.nan
|
|
73
|
-
if llm_output:
|
|
74
|
-
match = re.search(r"\d", llm_output)
|
|
75
|
-
if match:
|
|
76
|
-
score = float(match.group())
|
|
77
|
-
|
|
78
|
-
return {"similarity": float(score), "gpt_similarity": float(score)}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class SimilarityEvaluator:
|
|
13
|
+
class SimilarityEvaluator(PromptyEvaluatorBase):
|
|
82
14
|
"""
|
|
83
15
|
Evaluates similarity score for a given query, response, and ground truth.
|
|
84
16
|
|
|
@@ -113,13 +45,27 @@ class SimilarityEvaluator:
|
|
|
113
45
|
however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
|
|
114
46
|
"""
|
|
115
47
|
|
|
116
|
-
|
|
48
|
+
# Constants must be defined within eval's directory to be save/loadable
|
|
49
|
+
|
|
50
|
+
_PROMPTY_FILE = "similarity.prompty"
|
|
51
|
+
_RESULT_KEY = "similarity"
|
|
52
|
+
|
|
53
|
+
id = "similarity"
|
|
117
54
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
118
55
|
|
|
56
|
+
@override
|
|
119
57
|
def __init__(self, model_config):
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
58
|
+
current_dir = os.path.dirname(__file__)
|
|
59
|
+
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
|
|
60
|
+
super().__init__(model_config=model_config, prompty_file=prompty_path, result_key=self._RESULT_KEY)
|
|
61
|
+
|
|
62
|
+
# Ignoring a mypy error about having only 1 overload function.
|
|
63
|
+
# We want to use the overload style for all evals, even single-inputs. This is both to make
|
|
64
|
+
# refactoring to multi-input styles easier, stylistic consistency consistency across evals,
|
|
65
|
+
# and due to the fact that non-overloaded syntax now causes various parsing issues that
|
|
66
|
+
# we don't want to deal with.
|
|
67
|
+
@overload # type: ignore
|
|
68
|
+
def __call__(self, *, query: str, response: str, ground_truth: str) -> Dict[str, float]:
|
|
123
69
|
"""
|
|
124
70
|
Evaluate similarity.
|
|
125
71
|
|
|
@@ -132,9 +78,23 @@ class SimilarityEvaluator:
|
|
|
132
78
|
:return: The similarity score.
|
|
133
79
|
:rtype: Dict[str, float]
|
|
134
80
|
"""
|
|
135
|
-
return async_run_allowing_running_loop(
|
|
136
|
-
self._async_evaluator, query=query, response=response, ground_truth=ground_truth, **kwargs
|
|
137
|
-
)
|
|
138
81
|
|
|
139
|
-
|
|
140
|
-
|
|
82
|
+
@override
|
|
83
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
84
|
+
self,
|
|
85
|
+
*args,
|
|
86
|
+
**kwargs,
|
|
87
|
+
):
|
|
88
|
+
"""
|
|
89
|
+
Evaluate similarity.
|
|
90
|
+
|
|
91
|
+
:keyword query: The query to be evaluated.
|
|
92
|
+
:paramtype query: str
|
|
93
|
+
:keyword response: The response to be evaluated.
|
|
94
|
+
:paramtype response: str
|
|
95
|
+
:keyword ground_truth: The ground truth to be evaluated.
|
|
96
|
+
:paramtype ground_truth: str
|
|
97
|
+
:return: The similarity score.
|
|
98
|
+
:rtype: Dict[str, float]
|
|
99
|
+
"""
|
|
100
|
+
return super().__call__(*args, **kwargs)
|
azure/ai/evaluation/_version.py
CHANGED
|
@@ -157,7 +157,7 @@ class Simulator:
|
|
|
157
157
|
f"You have specified 'num_queries' < len('tasks') ({num_queries} < {len(tasks)}). "
|
|
158
158
|
f"Only the first {num_queries} lines of the specified tasks will be simulated."
|
|
159
159
|
)
|
|
160
|
-
|
|
160
|
+
|
|
161
161
|
max_conversation_turns *= 2 # account for both user and assistant turns
|
|
162
162
|
|
|
163
163
|
prompty_model_config = self.model_config
|
|
@@ -586,7 +586,10 @@ class Simulator:
|
|
|
586
586
|
for i, query_response_pair in enumerate(query_responses):
|
|
587
587
|
query = query_response_pair["q"]
|
|
588
588
|
response = query_response_pair["r"]
|
|
589
|
-
|
|
589
|
+
try:
|
|
590
|
+
task = tasks[i]
|
|
591
|
+
except IndexError:
|
|
592
|
+
task = None
|
|
590
593
|
|
|
591
594
|
conversation = await self._complete_conversation(
|
|
592
595
|
conversation_starter=query,
|
|
@@ -621,7 +624,7 @@ class Simulator:
|
|
|
621
624
|
*,
|
|
622
625
|
conversation_starter: str,
|
|
623
626
|
max_conversation_turns: int,
|
|
624
|
-
task: str,
|
|
627
|
+
task: Optional[str],
|
|
625
628
|
user_simulator_prompty: Optional[str],
|
|
626
629
|
user_simulator_prompty_options: Dict[str, Any],
|
|
627
630
|
target: Callable,
|
|
@@ -659,16 +662,21 @@ class Simulator:
|
|
|
659
662
|
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
660
663
|
)
|
|
661
664
|
if len(conversation_history) == 0:
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
665
|
+
if task:
|
|
666
|
+
conversation_starter_from_simulated_user = await user_flow(
|
|
667
|
+
task=task,
|
|
668
|
+
conversation_history=[
|
|
669
|
+
{
|
|
670
|
+
"role": "assistant",
|
|
671
|
+
"content": conversation_starter,
|
|
672
|
+
}
|
|
673
|
+
],
|
|
674
|
+
action="rewrite the assistant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.",
|
|
675
|
+
)
|
|
676
|
+
else:
|
|
677
|
+
conversation_starter_from_simulated_user = {
|
|
678
|
+
"content": conversation_starter,
|
|
679
|
+
}
|
|
672
680
|
else:
|
|
673
681
|
conversation_starter_from_simulated_user = await user_flow(
|
|
674
682
|
task=task,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -13,17 +13,16 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
20
19
|
Classifier: License :: OSI Approved :: MIT License
|
|
21
20
|
Classifier: Operating System :: OS Independent
|
|
22
|
-
Requires-Python: >=3.
|
|
21
|
+
Requires-Python: >=3.9
|
|
23
22
|
Description-Content-Type: text/markdown
|
|
24
23
|
License-File: NOTICE.txt
|
|
25
|
-
Requires-Dist: promptflow-devkit >=1.
|
|
26
|
-
Requires-Dist: promptflow-core >=1.
|
|
24
|
+
Requires-Dist: promptflow-devkit >=1.17.1
|
|
25
|
+
Requires-Dist: promptflow-core >=1.17.1
|
|
27
26
|
Requires-Dist: pyjwt >=2.8.0
|
|
28
27
|
Requires-Dist: azure-identity >=1.16.0
|
|
29
28
|
Requires-Dist: azure-core >=1.30.2
|
|
@@ -54,7 +53,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
|
|
|
54
53
|
|
|
55
54
|
### Prerequisites
|
|
56
55
|
|
|
57
|
-
- Python 3.
|
|
56
|
+
- Python 3.9 or later is required to use this package.
|
|
58
57
|
- [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
|
|
59
58
|
|
|
60
59
|
### Install the package
|
|
@@ -378,11 +377,76 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
378
377
|
|
|
379
378
|
# Release History
|
|
380
379
|
|
|
381
|
-
## 1.
|
|
380
|
+
## 1.2.0 (2025-01-27)
|
|
381
|
+
|
|
382
|
+
### Features Added
|
|
383
|
+
- CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
|
|
384
|
+
|
|
385
|
+
### Breaking Changes
|
|
386
|
+
- `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
|
|
382
387
|
|
|
383
388
|
### Bugs Fixed
|
|
384
389
|
- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
|
|
385
390
|
- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
|
|
391
|
+
- Fixed the non adversarial simulator to run in task-free mode
|
|
392
|
+
- Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
|
|
393
|
+
main score when aggregating per-turn evaluations from a conversation into an overall
|
|
394
|
+
evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
|
|
395
|
+
- Fixed bug in non adversarial simulator sample where `tasks` undefined
|
|
396
|
+
|
|
397
|
+
### Other Changes
|
|
398
|
+
- Changed minimum required python version to use this package from 3.8 to 3.9
|
|
399
|
+
- Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
|
|
400
|
+
- Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
|
|
401
|
+
environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
|
|
402
|
+
|
|
403
|
+
## 1.1.0 (2024-12-12)
|
|
404
|
+
|
|
405
|
+
### Features Added
|
|
406
|
+
- Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
|
|
407
|
+
|
|
408
|
+
```python
|
|
409
|
+
evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
|
|
410
|
+
conversation = {
|
|
411
|
+
"messages": [
|
|
412
|
+
{
|
|
413
|
+
"role": "system",
|
|
414
|
+
"content": [
|
|
415
|
+
{"type": "text", "text": "You are an AI assistant that understands images."}
|
|
416
|
+
],
|
|
417
|
+
},
|
|
418
|
+
{
|
|
419
|
+
"role": "user",
|
|
420
|
+
"content": [
|
|
421
|
+
{"type": "text", "text": "Can you describe this image?"},
|
|
422
|
+
{
|
|
423
|
+
"type": "image_url",
|
|
424
|
+
"image_url": {
|
|
425
|
+
"url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
|
|
426
|
+
},
|
|
427
|
+
},
|
|
428
|
+
],
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
"role": "assistant",
|
|
432
|
+
"content": [
|
|
433
|
+
{
|
|
434
|
+
"type": "text",
|
|
435
|
+
"text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
|
|
436
|
+
}
|
|
437
|
+
],
|
|
438
|
+
},
|
|
439
|
+
]
|
|
440
|
+
}
|
|
441
|
+
print("Calling Content Safety Evaluator for multi-modal")
|
|
442
|
+
score = evaluator(conversation=conversation)
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
- Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
|
|
446
|
+
|
|
447
|
+
### Bugs Fixed
|
|
448
|
+
- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
|
|
449
|
+
- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
|
|
386
450
|
|
|
387
451
|
## 1.0.1 (2024-11-15)
|
|
388
452
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
azure/ai/evaluation/__init__.py,sha256=MFxJRoKfSsP_Qlfq0FwynxNf4csNAfTYPQX7jdXc9RU,2757
|
|
2
|
-
azure/ai/evaluation/_constants.py,sha256=
|
|
2
|
+
azure/ai/evaluation/_constants.py,sha256=a7eCgdG6Kid79ebAMu0rPNH7foRF5Aii0K5YQI6cNPc,2765
|
|
3
3
|
azure/ai/evaluation/_exceptions.py,sha256=MsTbgsPGYPzIxs7MyLKzSeiVKEoCxYkVjONzNfv2tXA,5162
|
|
4
4
|
azure/ai/evaluation/_http_utils.py,sha256=1bGce6pKAL-vmaUGRPxVX7DVO05XVQ8YPIwIQ3q7mfA,17221
|
|
5
5
|
azure/ai/evaluation/_model_configurations.py,sha256=MNN6cQlz7P9vNfHmfEKsUcly3j1FEOEFsA8WV7GPuKQ,4043
|
|
6
6
|
azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
|
|
7
|
-
azure/ai/evaluation/_version.py,sha256=
|
|
7
|
+
azure/ai/evaluation/_version.py,sha256=aIrrVLGzX0UDxMjpkbe8HTOCqRr6Y9R8tC8XGAOocbE,199
|
|
8
8
|
azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
azure/ai/evaluation/_azure/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
10
|
-
azure/ai/evaluation/_azure/_clients.py,sha256=
|
|
11
|
-
azure/ai/evaluation/_azure/_models.py,sha256=
|
|
10
|
+
azure/ai/evaluation/_azure/_clients.py,sha256=N1V-LyQkItPuoKl0aieypFPdGSRSld9lQqH1x-n3L7U,9119
|
|
11
|
+
azure/ai/evaluation/_azure/_models.py,sha256=7EHmrCAFOscYY3H90wfmdPPdy0sqnOlgGFvraq_L-2Q,12517
|
|
12
12
|
azure/ai/evaluation/_azure/_token_manager.py,sha256=1NZHwgEc9BMXWPz5Ear_J5-oYjouD77crLHHqNLldEw,5193
|
|
13
13
|
azure/ai/evaluation/_common/__init__.py,sha256=LHTkf6dMLLxikrGNgbUuREBVQcs4ORHR6Eryo4bm9M8,586
|
|
14
14
|
azure/ai/evaluation/_common/_experimental.py,sha256=GVtSn9r1CeR_yEa578dJVNDJ3P24eqe8WYdH7llbiQY,5694
|
|
@@ -17,10 +17,10 @@ azure/ai/evaluation/_common/math.py,sha256=d4bwWe35_RWDIZNcbV1BTBbHNx2QHQ4-I3Eof
|
|
|
17
17
|
azure/ai/evaluation/_common/rai_service.py,sha256=DcakzdOour9qNdMXU-8UFfvLb12oexAoiJXG8XFTRBs,26462
|
|
18
18
|
azure/ai/evaluation/_common/utils.py,sha256=MQIZs95gH5je1L-S3twa_WQi071zRu0Dv54lzCI7ZgU,17642
|
|
19
19
|
azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
20
|
-
azure/ai/evaluation/_evaluate/_eval_run.py,sha256=
|
|
21
|
-
azure/ai/evaluation/_evaluate/_evaluate.py,sha256=
|
|
22
|
-
azure/ai/evaluation/_evaluate/_utils.py,sha256=
|
|
23
|
-
azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=
|
|
20
|
+
azure/ai/evaluation/_evaluate/_eval_run.py,sha256=QBtNBwUxqxsIVmKPU-_H8MDFkF4s_bW7arQYXAniRpo,21965
|
|
21
|
+
azure/ai/evaluation/_evaluate/_evaluate.py,sha256=hj1HG9WCjbvAk8iB0MwnVoV-ceQYKKecfyVTlbc3y4A,38934
|
|
22
|
+
azure/ai/evaluation/_evaluate/_utils.py,sha256=sKj_4iN-QjrRlEkiZwA9UNiWozS4LgJcUZ6AWdHrTY4,14231
|
|
23
|
+
azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=Z-TQdSxKTn0bjsF0YosIJMbQFQHDUv_b9zCBu1TeogQ,474
|
|
24
24
|
azure/ai/evaluation/_evaluate/_batch_run/code_client.py,sha256=XQLaXfswF6ReHLpQthHLuLLa65Pts8uawGp7kRqmMDs,8260
|
|
25
25
|
azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py,sha256=p3Bsg_shGs5RXvysOlvo0CQb4Te5herSvX1OP6ylFUQ,3543
|
|
26
26
|
azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py,sha256=T_QRHScDMBM4O6ejkkKdBmHPjH2NOF6owW48aVUYF6k,3775
|
|
@@ -28,35 +28,37 @@ azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py,sha256=SMos3bVmD7
|
|
|
28
28
|
azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=fhLqE41qxdjfBOGi23cpk6QgUe-s1Fw2xhAAUjNESF0,7045
|
|
29
29
|
azure/ai/evaluation/_evaluators/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
30
30
|
azure/ai/evaluation/_evaluators/_bleu/__init__.py,sha256=quKKO0kvOSkky5hcoNBvgBuMeeVRFCE9GSv70mAdGP4,260
|
|
31
|
-
azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=
|
|
31
|
+
azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=Px3KxTyNIuxy-4U3SE4XJHCd4r144JeVrlIGMdbaqBk,3425
|
|
32
32
|
azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0UsRNWMIXiea2lLtPPXNGvkJzQ0,258
|
|
33
33
|
azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=uG9hX2XWkMREKfMAWRoosjicoI4Lg3ptR3UcLEgKd0c,4643
|
|
34
34
|
azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=ANvh9mDFW7KMejrgdWqBLjj4SIqEO5WW9gg5pE0RLJk,6798
|
|
35
|
-
azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=
|
|
36
|
-
azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=
|
|
35
|
+
azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=xAymP_CZy4aPzWplMdXgQUQVDIUEMI-0nbgdm_umFYY,498
|
|
36
|
+
azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=LtlTsA0TUywMXEYj3mVggv43G0TfKnNkDG6ZgA_dWa4,23328
|
|
37
|
+
azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py,sha256=6WFmFMsobJjju3wzVFKx7EjuHqbBV9YXzlhbwu5vzio,2509
|
|
37
38
|
azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=hvJD7jR2ESePkRPN17ytoFhFiS0iTotOfeqmTwG2IMs,4531
|
|
38
|
-
azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=
|
|
39
|
+
azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=1ZwWu2dwN2y6bVOU3Ws4VvJoMVQ80tzYNutSEfmpYmg,7830
|
|
40
|
+
azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py,sha256=gjDBjRxJKwaHbshWH0j2idjlzfzNMnT9a9RL0fQiKeM,2129
|
|
39
41
|
azure/ai/evaluation/_evaluators/_content_safety/__init__.py,sha256=PEYMIybfP64f7byhuTaiq4RiqsYbjqejpW1JsJIG1jA,556
|
|
40
|
-
azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py,sha256=
|
|
41
|
-
azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py,sha256=
|
|
42
|
-
azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py,sha256=
|
|
43
|
-
azure/ai/evaluation/_evaluators/_content_safety/_sexual.py,sha256=
|
|
44
|
-
azure/ai/evaluation/_evaluators/_content_safety/_violence.py,sha256=
|
|
42
|
+
azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py,sha256=XKnIlxbzkB65cRXXcOGsv0W37QKxo_jsHbR3gijMZ78,4654
|
|
43
|
+
azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py,sha256=LcnJuePAwByoaXAQ5CVKnkO2IVCCRdVnFTUYbOyQTbs,6043
|
|
44
|
+
azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py,sha256=Nl_hTIRXO_UdAjUatPzbCTextsngkgib1ECzsmDHUvE,5280
|
|
45
|
+
azure/ai/evaluation/_evaluators/_content_safety/_sexual.py,sha256=z8bDdkZHW09D6NTY9mlK2abNMOy9hRAJwwTQs5vjvAc,5520
|
|
46
|
+
azure/ai/evaluation/_evaluators/_content_safety/_violence.py,sha256=z9dM3GOBSIw_WoEELPHRE3DSK3ol7MZbDkFJyuYENVk,5591
|
|
45
47
|
azure/ai/evaluation/_evaluators/_eci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
48
|
azure/ai/evaluation/_evaluators/_eci/_eci.py,sha256=a36sLZPHKi3YAdl0JvpL6vboZMqgGjnmz0qZ-o8vcWY,2934
|
|
47
49
|
azure/ai/evaluation/_evaluators/_f1_score/__init__.py,sha256=aEVbO7iMoF20obdpLQKcKm69Yyu3mYnblKELLqu8OGI,260
|
|
48
|
-
azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py,sha256=
|
|
50
|
+
azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py,sha256=nDUAz-vmkIR0Sj7JfMm3mVHfE3XnKrZnTzOUa6QKskk,5399
|
|
49
51
|
azure/ai/evaluation/_evaluators/_fluency/__init__.py,sha256=EEJw39xRa0bOAA1rELTTKXQu2s60n_7CZQRD0Gu2QVw,259
|
|
50
52
|
azure/ai/evaluation/_evaluators/_fluency/_fluency.py,sha256=mHQCismdL4cCeANcqWrDHCiVgr4UAWj0yIYJXt2pFDA,4399
|
|
51
53
|
azure/ai/evaluation/_evaluators/_fluency/fluency.prompty,sha256=n9v0W9eYwgIO-JSsLTSKEM_ApJuxxuKWQpNblrTEkFY,4861
|
|
52
54
|
azure/ai/evaluation/_evaluators/_gleu/__init__.py,sha256=Ae2EvQ7gqiYAoNO3LwGIhdAAjJPJDfT85rQGKrRrmbA,260
|
|
53
|
-
azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=
|
|
55
|
+
azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=E_HeUuDAW2pPhsbaWLHMMxqgUxPOgBv2Bnr_Z9M6AAs,3359
|
|
54
56
|
azure/ai/evaluation/_evaluators/_groundedness/__init__.py,sha256=UYNJUeRvBwcSVFyZpdsf29un5eyaDzYoo3QvC1gvlLg,274
|
|
55
57
|
azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=Zil5S7BXaVvW2wBUlsF3oGzZLOYrvSzGAY4TqKfFUX8,6876
|
|
56
58
|
azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty,sha256=v7TOm75DyW_1gOU6gSiZoPcRnHcJ65DrzR2cL_ucWDY,5814
|
|
57
59
|
azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty,sha256=8kNShdfxQvkII7GnqjmdqQ5TNelA2B6cjnqWZk8FFe4,5296
|
|
58
60
|
azure/ai/evaluation/_evaluators/_meteor/__init__.py,sha256=209na3pPsdmcuYpYHUYtqQybCpc3yZkc93HnRdicSlI,266
|
|
59
|
-
azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=
|
|
61
|
+
azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=OpugAjIgcTcNQ6g6Rks_8GVhcRiH524PbmBKH3bTefs,4369
|
|
60
62
|
azure/ai/evaluation/_evaluators/_multimodal/__init__.py,sha256=tPvsY0nv8T3VtiiAwJM6wT5A9FhKP2XXwUlCH994xl4,906
|
|
61
63
|
azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py,sha256=x0l6eLQhxVP85jEyGfFCl27C2okMgD0S3aJ_qrgB3Q8,5219
|
|
62
64
|
azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py,sha256=X2IVw0YvymDD3e4Vx-TfjqgqtYiAKVhUumjBowCpOmA,2441
|
|
@@ -68,7 +70,7 @@ azure/ai/evaluation/_evaluators/_multimodal/_violence.py,sha256=t1h3bY6N7SwlSgP_
|
|
|
68
70
|
azure/ai/evaluation/_evaluators/_protected_material/__init__.py,sha256=eRAQIU9diVXfO5bp6aLWxZoYUvOsrDIfy1gnDOeNTiI,109
|
|
69
71
|
azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py,sha256=IABs1YMBZdIi1u57dPi-aQpSiPWIGxEZ4hyt97jvdNA,4604
|
|
70
72
|
azure/ai/evaluation/_evaluators/_qa/__init__.py,sha256=bcXfT--C0hjym2haqd1B2-u9bDciyM0ThOFtU1Q69sk,244
|
|
71
|
-
azure/ai/evaluation/_evaluators/_qa/_qa.py,sha256=
|
|
73
|
+
azure/ai/evaluation/_evaluators/_qa/_qa.py,sha256=HG4JiSt5S20D70LmzW8t24qWg5_uiIKwIxjJ13ygfDo,3670
|
|
72
74
|
azure/ai/evaluation/_evaluators/_relevance/__init__.py,sha256=JlxytW32Nl8pbE-fI3GRpfgVuY9EG6zxIAn5VZGSwyc,265
|
|
73
75
|
azure/ai/evaluation/_evaluators/_relevance/_relevance.py,sha256=S1J5BR1-ZyCLQOTbdAHLDzzY1ccVnPyy9uVUlivmCx0,5287
|
|
74
76
|
azure/ai/evaluation/_evaluators/_relevance/relevance.prompty,sha256=VHKzVlC2Cv1xuholgIGmerPspspAI0t6IgJ2cxOuYDE,4811
|
|
@@ -76,11 +78,11 @@ azure/ai/evaluation/_evaluators/_retrieval/__init__.py,sha256=kMu47ZyTZ7f-4Yh6H3
|
|
|
76
78
|
azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py,sha256=fmd8zNOVSGQGT5icSAI6PwgnS7kKz_ZMKMnxKIchYl8,5085
|
|
77
79
|
azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty,sha256=_YVoO4Gt_WD42bUcj5n6BDW0dMUqNf0yF3Nj5XMOX2c,16490
|
|
78
80
|
azure/ai/evaluation/_evaluators/_rouge/__init__.py,sha256=kusCDaYcXogDugGefRP8MQSn9xv107oDbrMCqZ6K4GA,291
|
|
79
|
-
azure/ai/evaluation/_evaluators/_rouge/_rouge.py,sha256=
|
|
81
|
+
azure/ai/evaluation/_evaluators/_rouge/_rouge.py,sha256=xTsAF9em2QpWcgCHOmDCEcuRwrob5kPUUpkgul9E5jA,4642
|
|
80
82
|
azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py,sha256=0DODUGTOgaYyFbO9_zxuwifixDL3SIm3EkwP1sdwn6M,288
|
|
81
83
|
azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py,sha256=GPvufAgTnoQ2HYs6Xnnpmh23n5E3XxnUV0NGuwjDyU0,6648
|
|
82
84
|
azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTkRHG5NpN5s9XoiTSN4I8POWEkLA,268
|
|
83
|
-
azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=
|
|
85
|
+
azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=AeqJ_OJUAsdu9Cac4OLVPF2zbrBmpXD7_5oOs_cxKsk,4244
|
|
84
86
|
azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
|
|
85
87
|
azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
|
|
86
88
|
azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=Nv14lU7jN0yXKbHgHRXMHEy6pn1rXmesBOYI2Ge9ewk,5849
|
|
@@ -96,7 +98,7 @@ azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=FPZ3OdpGuwCHDVoOZ
|
|
|
96
98
|
azure/ai/evaluation/simulator/_constants.py,sha256=nCL7_1BnYh6k0XvxudxsDVMbiG9MMEvYw5wO9FZHHZ8,857
|
|
97
99
|
azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=FTtWf655dHJF5FLJi0xGSBgIlGWNiVWyqaLDJSud9XA,10199
|
|
98
100
|
azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=nweIU_AkUIR50qLQpjmljf_OkpsCPth2Ebf4vusygCA,10226
|
|
99
|
-
azure/ai/evaluation/simulator/_simulator.py,sha256=
|
|
101
|
+
azure/ai/evaluation/simulator/_simulator.py,sha256=LBzez7qvObpVjTwmlGS_PfhDLo8pRknh5epra2yo9X8,36484
|
|
100
102
|
azure/ai/evaluation/simulator/_tracing.py,sha256=frZ4-usrzINast9F4-ONRzEGGox71y8bYw0UHNufL1Y,3069
|
|
101
103
|
azure/ai/evaluation/simulator/_utils.py,sha256=16NltlywpbMtoFtULwTKqeURguIS1kSKSo3g8uKV8TA,5181
|
|
102
104
|
azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=s8djzJ58_-CiIA8xHB-SbgeZaq1F7ftrc3qJbpUpUdg,17853
|
|
@@ -116,8 +118,8 @@ azure/ai/evaluation/simulator/_model_tools/models.py,sha256=bfVm0PV3vfH_8DkdmTMZ
|
|
|
116
118
|
azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
117
119
|
azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=2BzSqDDYilDushvR56vMRDmqFIaIYAewdUlUZg_elMg,2182
|
|
118
120
|
azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=NE6lH4bfmibgMn4NgJtm9_l3PMoHSFrfjjosDJEKM0g,939
|
|
119
|
-
azure_ai_evaluation-1.
|
|
120
|
-
azure_ai_evaluation-1.
|
|
121
|
-
azure_ai_evaluation-1.
|
|
122
|
-
azure_ai_evaluation-1.
|
|
123
|
-
azure_ai_evaluation-1.
|
|
121
|
+
azure_ai_evaluation-1.2.0.dist-info/METADATA,sha256=NM0mPj138_k-6vWuKICoqUBtDq-TaWGXOeaCLpyT2IU,32156
|
|
122
|
+
azure_ai_evaluation-1.2.0.dist-info/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
|
|
123
|
+
azure_ai_evaluation-1.2.0.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
|
|
124
|
+
azure_ai_evaluation-1.2.0.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
|
|
125
|
+
azure_ai_evaluation-1.2.0.dist-info/RECORD,,
|