azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.0.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +9 -5
- azure/ai/evaluation/_common/utils.py +24 -9
- azure/ai/evaluation/_constants.py +4 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +57 -39
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +34 -81
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
- azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +29 -74
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +33 -80
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +34 -83
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
- azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +16 -22
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -11
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +16 -90
- azure/ai/evaluation/_exceptions.py +0 -1
- azure/ai/evaluation/_model_configurations.py +36 -8
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
- azure/ai/evaluation/simulator/_simulator.py +19 -8
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/METADATA +59 -1
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/RECORD +38 -39
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt +0 -0
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
# ---------------------------------------------------------
|
|
2
|
-
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
-
# ---------------------------------------------------------
|
|
4
|
-
from promptflow._utils.async_utils import async_run_allowing_running_loop
|
|
5
|
-
|
|
6
|
-
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
7
|
-
from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
|
|
8
|
-
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class _AsyncProtectedMaterialsEvaluator:
|
|
12
|
-
def __init__(self, azure_ai_project: dict, credential=None):
|
|
13
|
-
self._azure_ai_project = azure_ai_project
|
|
14
|
-
self._credential = credential
|
|
15
|
-
|
|
16
|
-
async def __call__(self, *, query: str, response: str, **kwargs):
|
|
17
|
-
"""
|
|
18
|
-
Evaluates content according to this evaluator's metric.
|
|
19
|
-
|
|
20
|
-
:keyword query: The query to be evaluated.
|
|
21
|
-
:paramtype query: str
|
|
22
|
-
:keyword response: The response to be evaluated.
|
|
23
|
-
:paramtype response: str
|
|
24
|
-
:return: The evaluation score computation based on the Content Safety metric (self.metric).
|
|
25
|
-
:rtype: Any
|
|
26
|
-
"""
|
|
27
|
-
# Validate inputs
|
|
28
|
-
# Raises value error if failed, so execution alone signifies success.
|
|
29
|
-
if not (query and query.strip() and query != "None") or not (
|
|
30
|
-
response and response.strip() and response != "None"
|
|
31
|
-
):
|
|
32
|
-
msg = "Both 'query' and 'response' must be non-empty strings."
|
|
33
|
-
raise EvaluationException(
|
|
34
|
-
message=msg,
|
|
35
|
-
internal_message=msg,
|
|
36
|
-
error_category=ErrorCategory.MISSING_FIELD,
|
|
37
|
-
error_blame=ErrorBlame.USER_ERROR,
|
|
38
|
-
error_target=ErrorTarget.PROTECTED_MATERIAL_EVALUATOR,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
# Run score computation based on supplied metric.
|
|
42
|
-
result = await evaluate_with_rai_service(
|
|
43
|
-
metric_name=EvaluationMetrics.PROTECTED_MATERIAL,
|
|
44
|
-
query=query,
|
|
45
|
-
response=response,
|
|
46
|
-
project_scope=self._azure_ai_project,
|
|
47
|
-
credential=self._credential,
|
|
48
|
-
)
|
|
49
|
-
return result
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class ProtectedMaterialsEvaluator:
|
|
53
|
-
"""
|
|
54
|
-
Initialize a protected materials evaluator to detect whether protected material
|
|
55
|
-
is present in your AI system's response. Outputs True or False with AI-generated reasoning.
|
|
56
|
-
|
|
57
|
-
:param azure_ai_project: The scope of the Azure AI project.
|
|
58
|
-
It contains subscription id, resource group, and project name.
|
|
59
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
60
|
-
:param credential: The credential for connecting to Azure AI project.
|
|
61
|
-
:type credential: ~azure.core.credentials.TokenCredential
|
|
62
|
-
:return: Whether or not protected material was found in the response, with AI-generated reasoning.
|
|
63
|
-
:rtype: Dict[str, str]
|
|
64
|
-
|
|
65
|
-
**Usage**
|
|
66
|
-
|
|
67
|
-
.. code-block:: python
|
|
68
|
-
|
|
69
|
-
azure_ai_project = {
|
|
70
|
-
"subscription_id": "<subscription_id>",
|
|
71
|
-
"resource_group_name": "<resource_group_name>",
|
|
72
|
-
"project_name": "<project_name>",
|
|
73
|
-
}
|
|
74
|
-
eval_fn = ProtectedMaterialsEvaluator(azure_ai_project)
|
|
75
|
-
result = eval_fn(query="What is the capital of France?", response="Paris.")
|
|
76
|
-
|
|
77
|
-
**Output format**
|
|
78
|
-
|
|
79
|
-
.. code-block:: python
|
|
80
|
-
|
|
81
|
-
{
|
|
82
|
-
"label": "False",
|
|
83
|
-
"reasoning": "This query does not contain any protected material."
|
|
84
|
-
}
|
|
85
|
-
"""
|
|
86
|
-
|
|
87
|
-
def __init__(self, azure_ai_project: dict, credential=None):
|
|
88
|
-
self._async_evaluator = _AsyncProtectedMaterialsEvaluator(azure_ai_project, credential)
|
|
89
|
-
|
|
90
|
-
def __call__(self, *, query: str, response: str, **kwargs):
|
|
91
|
-
"""
|
|
92
|
-
Evaluates protected materials content.
|
|
93
|
-
|
|
94
|
-
:keyword query: The query to be evaluated.
|
|
95
|
-
:paramtype query: str
|
|
96
|
-
:keyword response: The response to be evaluated.
|
|
97
|
-
:paramtype response: str
|
|
98
|
-
:return: A dictionary containing a boolean label and reasoning.
|
|
99
|
-
:rtype: dict
|
|
100
|
-
"""
|
|
101
|
-
return async_run_allowing_running_loop(self._async_evaluator, query=query, response=response, **kwargs)
|
|
102
|
-
|
|
103
|
-
def _to_async(self):
|
|
104
|
-
return self._async_evaluator
|
|
File without changes
|
{azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt
RENAMED
|
File without changes
|