azure-ai-evaluation 1.13.3__py3-none-any.whl → 1.13.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +2 -0
- azure/ai/evaluation/_common/constants.py +1 -1
- azure/ai/evaluation/_constants.py +0 -1
- azure/ai/evaluation/_eval_mapping.py +8 -4
- azure/ai/evaluation/_evaluate/_evaluate.py +2 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +5 -2
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +2 -2
- azure/ai/evaluation/_evaluators/{_tool_success → _tool_call_success}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/{_tool_success/_tool_success.py → _tool_call_success/_tool_call_success.py} +27 -22
- azure/ai/evaluation/_evaluators/{_tool_success/tool_success.prompty → _tool_call_success/tool_call_success.prompty} +1 -1
- azure/ai/evaluation/_exceptions.py +1 -1
- azure/ai/evaluation/_version.py +1 -1
- {azure_ai_evaluation-1.13.3.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +13 -1
- {azure_ai_evaluation-1.13.3.dist-info → azure_ai_evaluation-1.13.5.dist-info}/RECORD +17 -17
- {azure_ai_evaluation-1.13.3.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.13.3.dist-info → azure_ai_evaluation-1.13.5.dist-info}/licenses/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.13.3.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
azure/ai/evaluation/__init__.py
CHANGED
|
@@ -33,6 +33,7 @@ from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
|
|
|
33
33
|
from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
|
|
34
34
|
from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
|
|
35
35
|
from ._evaluators._tool_output_utilization import _ToolOutputUtilizationEvaluator
|
|
36
|
+
from ._evaluators._tool_call_success import _ToolCallSuccessEvaluator
|
|
36
37
|
from ._model_configurations import (
|
|
37
38
|
AzureAIProject,
|
|
38
39
|
AzureOpenAIModelConfiguration,
|
|
@@ -133,6 +134,7 @@ __all__ = [
|
|
|
133
134
|
"UngroundedAttributesEvaluator",
|
|
134
135
|
"ToolCallAccuracyEvaluator",
|
|
135
136
|
"_ToolOutputUtilizationEvaluator",
|
|
137
|
+
"_ToolCallSuccessEvaluator",
|
|
136
138
|
"AzureOpenAIGrader",
|
|
137
139
|
"AzureOpenAILabelGrader",
|
|
138
140
|
"AzureOpenAIStringCheckGrader",
|
|
@@ -143,7 +143,6 @@ class _EvaluatorMetricMapping:
|
|
|
143
143
|
"hate_unfairness": ["hate_unfairness"],
|
|
144
144
|
"tool_input_accuracy": ["tool_input_accuracy"],
|
|
145
145
|
"task_completion": ["task_completion"],
|
|
146
|
-
"tool_success": ["tool_success"],
|
|
147
146
|
"tool_call_success": ["tool_call_success"],
|
|
148
147
|
"tool_selection": ["tool_selection"],
|
|
149
148
|
"tool_output_utilization": ["tool_output_utilization"],
|
|
@@ -12,10 +12,14 @@
|
|
|
12
12
|
# Import all evals
|
|
13
13
|
from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
|
|
14
14
|
from azure.ai.evaluation._evaluators._task_completion import _TaskCompletionEvaluator
|
|
15
|
-
from azure.ai.evaluation._evaluators._tool_input_accuracy import
|
|
15
|
+
from azure.ai.evaluation._evaluators._tool_input_accuracy import (
|
|
16
|
+
_ToolInputAccuracyEvaluator,
|
|
17
|
+
)
|
|
16
18
|
from azure.ai.evaluation._evaluators._tool_selection import _ToolSelectionEvaluator
|
|
17
|
-
from azure.ai.evaluation._evaluators.
|
|
18
|
-
from azure.ai.evaluation._evaluators._task_navigation_efficiency import
|
|
19
|
+
from azure.ai.evaluation._evaluators._tool_call_success import _ToolCallSuccessEvaluator
|
|
20
|
+
from azure.ai.evaluation._evaluators._task_navigation_efficiency import (
|
|
21
|
+
_TaskNavigationEfficiencyEvaluator,
|
|
22
|
+
)
|
|
19
23
|
from azure.ai.evaluation import (
|
|
20
24
|
BleuScoreEvaluator,
|
|
21
25
|
CodeVulnerabilityEvaluator,
|
|
@@ -77,7 +81,7 @@ EVAL_CLASS_MAP = {
|
|
|
77
81
|
ToolCallAccuracyEvaluator: "tool_call_accuracy",
|
|
78
82
|
_ToolInputAccuracyEvaluator: "tool_input_accuracy",
|
|
79
83
|
_ToolSelectionEvaluator: "tool_selection",
|
|
80
|
-
|
|
84
|
+
_ToolCallSuccessEvaluator: "tool_call_success",
|
|
81
85
|
UngroundedAttributesEvaluator: "ungrounded_attributes",
|
|
82
86
|
ViolenceEvaluator: "violence",
|
|
83
87
|
}
|
|
@@ -1865,8 +1865,8 @@ def _convert_results_to_aoai_evaluation_results(
|
|
|
1865
1865
|
criteria_groups[criteria_name] = {}
|
|
1866
1866
|
|
|
1867
1867
|
criteria_groups[criteria_name][metric_name] = value
|
|
1868
|
-
|
|
1869
|
-
input_key = key.replace("inputs.", "")
|
|
1868
|
+
else:
|
|
1869
|
+
input_key = key.replace("inputs.", "") if key.startswith("inputs.") else key
|
|
1870
1870
|
if input_key not in input_groups:
|
|
1871
1871
|
input_groups[input_key] = value
|
|
1872
1872
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
5
|
import inspect
|
|
6
|
+
import logging
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
7
8
|
import json
|
|
8
9
|
import copy
|
|
@@ -46,6 +47,8 @@ P = ParamSpec("P")
|
|
|
46
47
|
T = TypeVar("T")
|
|
47
48
|
T_EvalValue = TypeVar("T_EvalValue")
|
|
48
49
|
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
49
52
|
|
|
50
53
|
class DerivedEvalInput(TypedDict, total=False):
|
|
51
54
|
"""The eval input generated by EvaluatorBase._derive_conversation_starter."""
|
|
@@ -593,7 +596,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
593
596
|
try:
|
|
594
597
|
eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
|
|
595
598
|
except Exception as e:
|
|
596
|
-
|
|
599
|
+
logger.error(f"Error converting kwargs to eval_input_list: {e}")
|
|
597
600
|
raise e
|
|
598
601
|
per_turn_results = []
|
|
599
602
|
# Evaluate all inputs.
|
|
@@ -630,7 +633,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
630
633
|
else:
|
|
631
634
|
result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]
|
|
632
635
|
except Exception as e:
|
|
633
|
-
|
|
636
|
+
logger.warning(f"Error calculating binary result: {e}")
|
|
634
637
|
per_turn_results.append(result)
|
|
635
638
|
# Return results as-is if only one result was produced.
|
|
636
639
|
|
|
@@ -63,7 +63,7 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
63
63
|
|
|
64
64
|
_PROMPTY_FILE = "task_adherence.prompty"
|
|
65
65
|
_RESULT_KEY = "task_adherence"
|
|
66
|
-
_OPTIONAL_PARAMS = []
|
|
66
|
+
_OPTIONAL_PARAMS = ["tool_definitions"]
|
|
67
67
|
|
|
68
68
|
_DEFAULT_TASK_ADHERENCE_SCORE = 0
|
|
69
69
|
|
|
@@ -175,7 +175,7 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
175
175
|
if isinstance(content, list):
|
|
176
176
|
for item in content:
|
|
177
177
|
if isinstance(item, dict):
|
|
178
|
-
if item.get("type")
|
|
178
|
+
if item.get("type", None) in ("text", "input_text", "output_text"):
|
|
179
179
|
assistant_parts.append(item.get("text", ""))
|
|
180
180
|
elif item.get("type") == "tool_call":
|
|
181
181
|
tool_parts.append(str(item.get("tool_call", "")))
|
|
@@ -2,6 +2,6 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
from .
|
|
5
|
+
from ._tool_call_success import _ToolCallSuccessEvaluator
|
|
6
6
|
|
|
7
|
-
__all__ = ["
|
|
7
|
+
__all__ = ["_ToolCallSuccessEvaluator"]
|
|
@@ -6,7 +6,12 @@ import os
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Dict, Union, List, Optional
|
|
8
8
|
from typing_extensions import overload, override
|
|
9
|
-
from azure.ai.evaluation._exceptions import
|
|
9
|
+
from azure.ai.evaluation._exceptions import (
|
|
10
|
+
EvaluationException,
|
|
11
|
+
ErrorBlame,
|
|
12
|
+
ErrorCategory,
|
|
13
|
+
ErrorTarget,
|
|
14
|
+
)
|
|
10
15
|
from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
|
|
11
16
|
from azure.ai.evaluation._common._experimental import experimental
|
|
12
17
|
|
|
@@ -15,8 +20,8 @@ logger = logging.getLogger(__name__)
|
|
|
15
20
|
|
|
16
21
|
|
|
17
22
|
@experimental
|
|
18
|
-
class
|
|
19
|
-
"""The Tool Success evaluator determines whether tool calls done by an AI agent includes failures or not.
|
|
23
|
+
class _ToolCallSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
24
|
+
"""The Tool Call Success evaluator determines whether tool calls done by an AI agent includes failures or not.
|
|
20
25
|
|
|
21
26
|
This evaluator focuses solely on tool call results and tool definitions, disregarding user's query to
|
|
22
27
|
the agent, conversation history and agent's final response. Although tool definitions is optional,
|
|
@@ -36,34 +41,34 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
36
41
|
|
|
37
42
|
.. admonition:: Example:
|
|
38
43
|
.. literalinclude:: ../samples/evaluation_samples_evaluate.py
|
|
39
|
-
:start-after: [START
|
|
40
|
-
:end-before: [END
|
|
44
|
+
:start-after: [START tool_call_success_evaluator]
|
|
45
|
+
:end-before: [END tool_call_success_evaluator]
|
|
41
46
|
:language: python
|
|
42
47
|
:dedent: 8
|
|
43
|
-
:caption: Initialize and call a
|
|
48
|
+
:caption: Initialize and call a _ToolCallSuccessEvaluator with a tool definitions and response.
|
|
44
49
|
|
|
45
50
|
.. admonition:: Example using Azure AI Project URL:
|
|
46
51
|
|
|
47
52
|
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
48
|
-
:start-after: [START
|
|
49
|
-
:end-before: [END
|
|
53
|
+
:start-after: [START tool_call_success_evaluator]
|
|
54
|
+
:end-before: [END tool_call_success_evaluator]
|
|
50
55
|
:language: python
|
|
51
56
|
:dedent: 8
|
|
52
|
-
:caption: Initialize and call a
|
|
57
|
+
:caption: Initialize and call a _ToolCallSuccessEvaluator using Azure AI Project URL in the following
|
|
53
58
|
format https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
54
59
|
|
|
55
60
|
"""
|
|
56
61
|
|
|
57
|
-
_PROMPTY_FILE = "
|
|
58
|
-
_RESULT_KEY = "
|
|
62
|
+
_PROMPTY_FILE = "tool_call_success.prompty"
|
|
63
|
+
_RESULT_KEY = "tool_call_success"
|
|
59
64
|
_OPTIONAL_PARAMS = ["tool_definitions"]
|
|
60
65
|
|
|
61
|
-
id = "azureai://built-in/evaluators/
|
|
66
|
+
id = "azureai://built-in/evaluators/tool_call_success"
|
|
62
67
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
63
68
|
|
|
64
69
|
@override
|
|
65
70
|
def __init__(self, model_config, *, credential=None, **kwargs):
|
|
66
|
-
"""Initialize the Tool Success evaluator."""
|
|
71
|
+
"""Initialize the Tool Call Success evaluator."""
|
|
67
72
|
current_dir = os.path.dirname(__file__)
|
|
68
73
|
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
|
|
69
74
|
super().__init__(
|
|
@@ -86,7 +91,7 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
86
91
|
"""Evaluate tool call success for a given response, and optionally tool definitions.
|
|
87
92
|
|
|
88
93
|
Example with list of messages:
|
|
89
|
-
evaluator =
|
|
94
|
+
evaluator = _ToolCallSuccessEvaluator(model_config)
|
|
90
95
|
response = [{'createdAt': 1700000070, 'run_id': '0', 'role': 'assistant',
|
|
91
96
|
'content': [{'type': 'text', 'text': '**Day 1:** Morning: Visit Louvre Museum (9 AM - 12 PM)...'}]}]
|
|
92
97
|
|
|
@@ -97,7 +102,7 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
97
102
|
:paramtype response: Union[str, List[dict]]
|
|
98
103
|
:keyword tool_definitions: Optional tool definitions to use for evaluation.
|
|
99
104
|
:paramtype tool_definitions: Union[dict, List[dict]]
|
|
100
|
-
:return: A dictionary with the
|
|
105
|
+
:return: A dictionary with the Tool Call Success evaluation results.
|
|
101
106
|
:rtype: Dict[str, Union[str, float]]
|
|
102
107
|
"""
|
|
103
108
|
|
|
@@ -116,7 +121,7 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
116
121
|
|
|
117
122
|
@override
|
|
118
123
|
async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]: # type: ignore[override]
|
|
119
|
-
"""Do Tool Success evaluation.
|
|
124
|
+
"""Do Tool Call Success evaluation.
|
|
120
125
|
|
|
121
126
|
:param eval_input: The input to the evaluator. Expected to contain whatever inputs are
|
|
122
127
|
needed for the _flow method
|
|
@@ -126,19 +131,19 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
126
131
|
"""
|
|
127
132
|
if "response" not in eval_input:
|
|
128
133
|
raise EvaluationException(
|
|
129
|
-
message="response is a required input to the Tool Success evaluator.",
|
|
130
|
-
internal_message="response is a required input to the Tool Success evaluator.",
|
|
134
|
+
message="response is a required input to the Tool Call Success evaluator.",
|
|
135
|
+
internal_message="response is a required input to the Tool Call Success evaluator.",
|
|
131
136
|
blame=ErrorBlame.USER_ERROR,
|
|
132
137
|
category=ErrorCategory.MISSING_FIELD,
|
|
133
|
-
target=ErrorTarget.
|
|
138
|
+
target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
|
|
134
139
|
)
|
|
135
140
|
if eval_input["response"] is None or eval_input["response"] == []:
|
|
136
141
|
raise EvaluationException(
|
|
137
|
-
message="response cannot be None or empty for the Tool Success evaluator.",
|
|
138
|
-
internal_message="response cannot be None or empty for the Tool Success evaluator.",
|
|
142
|
+
message="response cannot be None or empty for the Tool Call Success evaluator.",
|
|
143
|
+
internal_message="response cannot be None or empty for the Tool Call Success evaluator.",
|
|
139
144
|
blame=ErrorBlame.USER_ERROR,
|
|
140
145
|
category=ErrorCategory.INVALID_VALUE,
|
|
141
|
-
target=ErrorTarget.
|
|
146
|
+
target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
|
|
142
147
|
)
|
|
143
148
|
|
|
144
149
|
eval_input["tool_calls"] = _reformat_tool_calls_results(eval_input["response"], logger)
|
|
@@ -85,7 +85,7 @@ class ErrorTarget(Enum):
|
|
|
85
85
|
SIMILARITY_EVALUATOR = "SimilarityEvaluator"
|
|
86
86
|
FLUENCY_EVALUATOR = "FluencyEvaluator"
|
|
87
87
|
RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
|
|
88
|
-
|
|
88
|
+
TOOL_CALL_SUCCESS_EVALUATOR = "_ToolCallSuccessEvaluator"
|
|
89
89
|
TASK_ADHERENCE_EVALUATOR = "TaskAdherenceEvaluator"
|
|
90
90
|
TASK_COMPLETION_EVALUATOR = "_TaskCompletionEvaluator"
|
|
91
91
|
INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
|
azure/ai/evaluation/_version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.13.
|
|
3
|
+
Version: 1.13.5
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -418,6 +418,18 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
418
418
|
|
|
419
419
|
# Release History
|
|
420
420
|
|
|
421
|
+
## 1.13.5 (2025-11-10)
|
|
422
|
+
|
|
423
|
+
### Bugs Fixed
|
|
424
|
+
|
|
425
|
+
- **TaskAdherenceEvaluator:** treat tool definitions as optional so evaluations with only query/response inputs no longer raise “Either 'conversation' or individual inputs must be provided.”
|
|
426
|
+
|
|
427
|
+
## 1.13.4 (2025-11-10)
|
|
428
|
+
|
|
429
|
+
### Bugs Fixed
|
|
430
|
+
|
|
431
|
+
- Handle input data for evaluation result when evaluators.
|
|
432
|
+
|
|
421
433
|
## 1.13.3 (2025-11-08)
|
|
422
434
|
|
|
423
435
|
### Other Changes
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
azure/ai/evaluation/__init__.py,sha256=
|
|
2
|
-
azure/ai/evaluation/_constants.py,sha256=
|
|
3
|
-
azure/ai/evaluation/_eval_mapping.py,sha256=
|
|
1
|
+
azure/ai/evaluation/__init__.py,sha256=Qmb4OXIHXKtYH0ndXkH1onUYX9HUhqTXJiOAYwwnK9M,5522
|
|
2
|
+
azure/ai/evaluation/_constants.py,sha256=6lWU_3Nse--HCBS8U7iZMYKlL7tLhtmL7wYYpnz564M,8039
|
|
3
|
+
azure/ai/evaluation/_eval_mapping.py,sha256=awjtLDq0RtTrc5AvBBHWXj0mMyinMk_Ln2mkI1yvCPg,3335
|
|
4
4
|
azure/ai/evaluation/_evaluator_definition.py,sha256=Z33dPXVAhMGEdn6kcE5d-p_v8VtE6Hpm6-PbNIqKh0Y,2901
|
|
5
|
-
azure/ai/evaluation/_exceptions.py,sha256=
|
|
5
|
+
azure/ai/evaluation/_exceptions.py,sha256=90L9njT2aFznT4DGo5zuksuMcZRvOYUoxJ2nk90NfPo,6283
|
|
6
6
|
azure/ai/evaluation/_http_utils.py,sha256=d1McnMRT5lnaoR8x4r3pkfH2ic4T3JArclOK4kAaUmg,17261
|
|
7
7
|
azure/ai/evaluation/_model_configurations.py,sha256=OZ-QskAbBX7lwjeyum6EOoh1oWSi7lplghnfMoFPpUU,5167
|
|
8
8
|
azure/ai/evaluation/_user_agent.py,sha256=SgUm6acnwyoENu8KroyaWRrJroJNqLZBccpQoeKyrHw,1144
|
|
9
|
-
azure/ai/evaluation/_version.py,sha256=
|
|
9
|
+
azure/ai/evaluation/_version.py,sha256=UoLHgo6no_BDqgyiNVsfetHiZGXkGZGvSkj1U6I8FJ8,230
|
|
10
10
|
azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
azure/ai/evaluation/_aoai/__init__.py,sha256=0Ji05ShlsJaytvexXUpCe69t0jSNd3PpNbhr0zCkr6A,265
|
|
12
12
|
azure/ai/evaluation/_aoai/aoai_grader.py,sha256=ey8YWNstlFq9SCu-kH3oFFsHCJcBeEabQAEAGENDy2c,6228
|
|
@@ -22,7 +22,7 @@ azure/ai/evaluation/_azure/_models.py,sha256=Vxcg7WfpAyxLQ-EesZzLGqopguV9Oohfjf-
|
|
|
22
22
|
azure/ai/evaluation/_azure/_token_manager.py,sha256=EoNbDLweJOukqReOKJVgnQ9AFLVuQeK3KJrJtZKTIAA,5489
|
|
23
23
|
azure/ai/evaluation/_common/__init__.py,sha256=5PO6eGs0zdarngR5shIsEn5gp2XfpUnLyt4HR79Hf5o,933
|
|
24
24
|
azure/ai/evaluation/_common/_experimental.py,sha256=GVtSn9r1CeR_yEa578dJVNDJ3P24eqe8WYdH7llbiQY,5694
|
|
25
|
-
azure/ai/evaluation/_common/constants.py,sha256=
|
|
25
|
+
azure/ai/evaluation/_common/constants.py,sha256=AE5nhGKXZTwbq51m69HgdlCf5-ybyPGMf6tzlRxkPfE,6578
|
|
26
26
|
azure/ai/evaluation/_common/evaluation_onedp_client.py,sha256=9QSK5lvMHtrlT3mrQwORZC-xjaS6suheYKwbtWKUjUM,7512
|
|
27
27
|
azure/ai/evaluation/_common/math.py,sha256=d4bwWe35_RWDIZNcbV1BTBbHNx2QHQ4-I3EofDyyNE0,2863
|
|
28
28
|
azure/ai/evaluation/_common/rai_service.py,sha256=tr5gd6j-SH7VeFRCzCPyZef0XV9xq9RqO5rrOlJ5Ztw,47970
|
|
@@ -99,7 +99,7 @@ azure/ai/evaluation/_converters/_models.py,sha256=x6GxLItQtvccv8q6jWtOUmQL1ZdeIA
|
|
|
99
99
|
azure/ai/evaluation/_converters/_sk_services.py,sha256=NfjflVgeJUF0MrvAiUd_uF2magn38Q_MKmHzaY41vOA,18239
|
|
100
100
|
azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
101
101
|
azure/ai/evaluation/_evaluate/_eval_run.py,sha256=57rfW4MkE9LSlQNqdzxvq_nw8xYW-mqPQLw4WY_k-YU,22564
|
|
102
|
-
azure/ai/evaluation/_evaluate/_evaluate.py,sha256=
|
|
102
|
+
azure/ai/evaluation/_evaluate/_evaluate.py,sha256=XjZ2rYRPjqCxPr3Asl2vtQ4nXMbuxm8evUve8roM0z4,119950
|
|
103
103
|
azure/ai/evaluation/_evaluate/_evaluate_aoai.py,sha256=ViMDSEvmVCiJ96ZSiuHv66d0EL4o8Qa_r6jkxwxmB-g,44030
|
|
104
104
|
azure/ai/evaluation/_evaluate/_utils.py,sha256=teIRlQlEctgFYA8S8FedelFicBrt8g05OPVsE6bq8FI,19751
|
|
105
105
|
azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=cPLi_MJ_pCp8eKBxJbiSoxgTnN3nDLuaP57dMkKuyhg,552
|
|
@@ -119,7 +119,7 @@ azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0U
|
|
|
119
119
|
azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=r_21EFCX-2oAO6FxwUnBdUUwS-QgBiC0zGXSW3hXj2I,6335
|
|
120
120
|
azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=ANvh9mDFW7KMejrgdWqBLjj4SIqEO5WW9gg5pE0RLJk,6798
|
|
121
121
|
azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=xAymP_CZy4aPzWplMdXgQUQVDIUEMI-0nbgdm_umFYY,498
|
|
122
|
-
azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=
|
|
122
|
+
azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=1p9sLBfQDydc5okMK1VvU5agn52hkOlp_8-DkzHP59o,34094
|
|
123
123
|
azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py,sha256=yYFpoCDe2wMFQck0ykbX8IJBBidk6NT1wUTkVFlVSy8,2728
|
|
124
124
|
azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=M4gVGxd31QP6xA6U-rKDUC52MzexXOnP_9K2BcEAFPE,15903
|
|
125
125
|
azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=JSZhRxVKljM4XE4P2DGrJSnD6iWr7tlDIJ8g95rHaGg,9078
|
|
@@ -171,7 +171,7 @@ azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTk
|
|
|
171
171
|
azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=fw250aoDCQ-oXalmyUoYXxK2p43nxnn4sjVqTVSPvD8,5951
|
|
172
172
|
azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
|
|
173
173
|
azure/ai/evaluation/_evaluators/_task_adherence/__init__.py,sha256=9HtNrG7yYX0Ygq3cZoS_0obAvGgmy5HWcsBcPKoB15c,271
|
|
174
|
-
azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py,sha256=
|
|
174
|
+
azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py,sha256=31DzzKXtgQgoYPQ8OD1TAOZMlv3W7RXKtT0cdeXrCE0,12727
|
|
175
175
|
azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty,sha256=cmrMjfqJXYtOPUcSCtGSiD8HUe4k53-n07kegjrH0Kg,7387
|
|
176
176
|
azure/ai/evaluation/_evaluators/_task_completion/__init__.py,sha256=xW5ZKj8-ipSxORbLlSRAV01jHv8V9e0MpOhrBqnuMQY,276
|
|
177
177
|
azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py,sha256=ASQyPJQn0OWOFmYJ_uFofSptePt7t9tC8sO3LkAV-Gs,9083
|
|
@@ -181,6 +181,9 @@ azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_eff
|
|
|
181
181
|
azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py,sha256=vYB4Y_3n1LqTiEeZB1O1A0b14wpURBwtW0wPEN2FG9Q,288
|
|
182
182
|
azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py,sha256=YhuaMKqP8U6XfnW5QruF5-68cTUAWOAmBSSAB54DYPs,14807
|
|
183
183
|
azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty,sha256=pQhNQC1w12WNBU4sdgyhO9gXhZY1dWrNIBK31ciW-V4,10138
|
|
184
|
+
azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py,sha256=pMYF75Q671a9OFQMBxiINqKHcW6GKf20_QGT594Cmd0,280
|
|
185
|
+
azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py,sha256=0s4UmTyQFg6PPDPKOa-RycNyP8x7xhRrYBnqIvIH_1I,13952
|
|
186
|
+
azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty,sha256=Ti7UWCIC6B3x6Er5rizLwhIvTLEXFt6EiuMIYB_hbmA,10105
|
|
184
187
|
azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py,sha256=Iu1GBkR76fpMafxOZhG9bTAbVIaR0HciVwbuTeczgrc,293
|
|
185
188
|
azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py,sha256=cPVw8bNERwQWwJtGYS9juomRvmOzCj6XO8Yx9syUDBk,11617
|
|
186
189
|
azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty,sha256=JapLVUu_faMUO51Bo9uEy-VIn4sgh5n2uyASkeCnjSc,3028
|
|
@@ -190,9 +193,6 @@ azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization
|
|
|
190
193
|
azure/ai/evaluation/_evaluators/_tool_selection/__init__.py,sha256=7nMGfFz8WgTg8s195-WHMNE-dhC3YN9P3xmmQQLWNEE,280
|
|
191
194
|
azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py,sha256=5GLqGpKBx2Tw9TMK2ugimMH7cCuH3zuScsViTxNPp8I,11873
|
|
192
195
|
azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty,sha256=NorcOvfmlzXqEaaWs_VcmdFpStVmHeJ0cNA8yEyRJlQ,7441
|
|
193
|
-
azure/ai/evaluation/_evaluators/_tool_success/__init__.py,sha256=XezZXpi62CHZiDZJ41tBY0kWpB36XYcMh9YVsI6xBz8,267
|
|
194
|
-
azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py,sha256=iWEcAF1dPALxCDrzV6lhN84jpDtHg00R7JO7ut0uTOw,13830
|
|
195
|
-
azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty,sha256=innPK0WyTY7DWtA88Laj9JgRMmT-bQzfEq9Pq68gOSg,10100
|
|
196
196
|
azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py,sha256=wGZBd_cRDgkuS-0HV9qm81dHK7ScYdKd98xLPtk6EwQ,118
|
|
197
197
|
azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py,sha256=zDoc7SSaOSVlT-uCurAbsIOMzJ6HAuXk0SfYDrBE3p8,3736
|
|
198
198
|
azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
|
|
@@ -298,8 +298,8 @@ azure/ai/evaluation/simulator/_model_tools/models.py,sha256=SJYI-IJfCo9bitMGfSSn
|
|
|
298
298
|
azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
299
299
|
azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=2BzSqDDYilDushvR56vMRDmqFIaIYAewdUlUZg_elMg,2182
|
|
300
300
|
azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=NE6lH4bfmibgMn4NgJtm9_l3PMoHSFrfjjosDJEKM0g,939
|
|
301
|
-
azure_ai_evaluation-1.13.
|
|
302
|
-
azure_ai_evaluation-1.13.
|
|
303
|
-
azure_ai_evaluation-1.13.
|
|
304
|
-
azure_ai_evaluation-1.13.
|
|
305
|
-
azure_ai_evaluation-1.13.
|
|
301
|
+
azure_ai_evaluation-1.13.5.dist-info/licenses/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
|
|
302
|
+
azure_ai_evaluation-1.13.5.dist-info/METADATA,sha256=k9CUcOdrbo27BI_G2LkIIJ2bbiOk6odKG327TcJEPEo,48938
|
|
303
|
+
azure_ai_evaluation-1.13.5.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
|
304
|
+
azure_ai_evaluation-1.13.5.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
|
|
305
|
+
azure_ai_evaluation-1.13.5.dist-info/RECORD,,
|
|
File without changes
|
{azure_ai_evaluation-1.13.3.dist-info → azure_ai_evaluation-1.13.5.dist-info}/licenses/NOTICE.txt
RENAMED
|
File without changes
|
|
File without changes
|