azure-ai-evaluation 1.13.3__py3-none-any.whl → 1.13.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

@@ -33,6 +33,7 @@ from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
33
33
  from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
34
34
  from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
35
35
  from ._evaluators._tool_output_utilization import _ToolOutputUtilizationEvaluator
36
+ from ._evaluators._tool_call_success import _ToolCallSuccessEvaluator
36
37
  from ._model_configurations import (
37
38
  AzureAIProject,
38
39
  AzureOpenAIModelConfiguration,
@@ -133,6 +134,7 @@ __all__ = [
133
134
  "UngroundedAttributesEvaluator",
134
135
  "ToolCallAccuracyEvaluator",
135
136
  "_ToolOutputUtilizationEvaluator",
137
+ "_ToolCallSuccessEvaluator",
136
138
  "AzureOpenAIGrader",
137
139
  "AzureOpenAILabelGrader",
138
140
  "AzureOpenAIStringCheckGrader",
@@ -20,7 +20,7 @@ PROMPT_BASED_REASON_EVALUATORS = [
20
20
  "tool_output_utilization",
21
21
  "task_completion",
22
22
  "tool_input_accuracy",
23
- "tool_success",
23
+ "tool_call_success",
24
24
  "tool_call_accuracy",
25
25
  ]
26
26
 
@@ -143,7 +143,6 @@ class _EvaluatorMetricMapping:
143
143
  "hate_unfairness": ["hate_unfairness"],
144
144
  "tool_input_accuracy": ["tool_input_accuracy"],
145
145
  "task_completion": ["task_completion"],
146
- "tool_success": ["tool_success"],
147
146
  "tool_call_success": ["tool_call_success"],
148
147
  "tool_selection": ["tool_selection"],
149
148
  "tool_output_utilization": ["tool_output_utilization"],
@@ -12,10 +12,14 @@
12
12
  # Import all evals
13
13
  from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
14
14
  from azure.ai.evaluation._evaluators._task_completion import _TaskCompletionEvaluator
15
- from azure.ai.evaluation._evaluators._tool_input_accuracy import _ToolInputAccuracyEvaluator
15
+ from azure.ai.evaluation._evaluators._tool_input_accuracy import (
16
+ _ToolInputAccuracyEvaluator,
17
+ )
16
18
  from azure.ai.evaluation._evaluators._tool_selection import _ToolSelectionEvaluator
17
- from azure.ai.evaluation._evaluators._tool_success import _ToolSuccessEvaluator
18
- from azure.ai.evaluation._evaluators._task_navigation_efficiency import _TaskNavigationEfficiencyEvaluator
19
+ from azure.ai.evaluation._evaluators._tool_call_success import _ToolCallSuccessEvaluator
20
+ from azure.ai.evaluation._evaluators._task_navigation_efficiency import (
21
+ _TaskNavigationEfficiencyEvaluator,
22
+ )
19
23
  from azure.ai.evaluation import (
20
24
  BleuScoreEvaluator,
21
25
  CodeVulnerabilityEvaluator,
@@ -77,7 +81,7 @@ EVAL_CLASS_MAP = {
77
81
  ToolCallAccuracyEvaluator: "tool_call_accuracy",
78
82
  _ToolInputAccuracyEvaluator: "tool_input_accuracy",
79
83
  _ToolSelectionEvaluator: "tool_selection",
80
- _ToolSuccessEvaluator: "tool_success",
84
+ _ToolCallSuccessEvaluator: "tool_call_success",
81
85
  UngroundedAttributesEvaluator: "ungrounded_attributes",
82
86
  ViolenceEvaluator: "violence",
83
87
  }
@@ -1865,8 +1865,8 @@ def _convert_results_to_aoai_evaluation_results(
1865
1865
  criteria_groups[criteria_name] = {}
1866
1866
 
1867
1867
  criteria_groups[criteria_name][metric_name] = value
1868
- elif key.startswith("inputs."):
1869
- input_key = key.replace("inputs.", "")
1868
+ else:
1869
+ input_key = key.replace("inputs.", "") if key.startswith("inputs.") else key
1870
1870
  if input_key not in input_groups:
1871
1871
  input_groups[input_key] = value
1872
1872
 
@@ -3,6 +3,7 @@
3
3
  # ---------------------------------------------------------
4
4
 
5
5
  import inspect
6
+ import logging
6
7
  from abc import ABC, abstractmethod
7
8
  import json
8
9
  import copy
@@ -46,6 +47,8 @@ P = ParamSpec("P")
46
47
  T = TypeVar("T")
47
48
  T_EvalValue = TypeVar("T_EvalValue")
48
49
 
50
+ logger = logging.getLogger(__name__)
51
+
49
52
 
50
53
  class DerivedEvalInput(TypedDict, total=False):
51
54
  """The eval input generated by EvaluatorBase._derive_conversation_starter."""
@@ -593,7 +596,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
593
596
  try:
594
597
  eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
595
598
  except Exception as e:
596
- print(f"Error converting kwargs to eval_input_list: {e}")
599
+ logger.error(f"Error converting kwargs to eval_input_list: {e}")
597
600
  raise e
598
601
  per_turn_results = []
599
602
  # Evaluate all inputs.
@@ -630,7 +633,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
630
633
  else:
631
634
  result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]
632
635
  except Exception as e:
633
- print(f"Error calculating binary result: {e}")
636
+ logger.warning(f"Error calculating binary result: {e}")
634
637
  per_turn_results.append(result)
635
638
  # Return results as-is if only one result was produced.
636
639
 
@@ -63,7 +63,7 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
63
63
 
64
64
  _PROMPTY_FILE = "task_adherence.prompty"
65
65
  _RESULT_KEY = "task_adherence"
66
- _OPTIONAL_PARAMS = []
66
+ _OPTIONAL_PARAMS = ["tool_definitions"]
67
67
 
68
68
  _DEFAULT_TASK_ADHERENCE_SCORE = 0
69
69
 
@@ -175,7 +175,7 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
175
175
  if isinstance(content, list):
176
176
  for item in content:
177
177
  if isinstance(item, dict):
178
- if item.get("type") == "text":
178
+ if item.get("type", None) in ("text", "input_text", "output_text"):
179
179
  assistant_parts.append(item.get("text", ""))
180
180
  elif item.get("type") == "tool_call":
181
181
  tool_parts.append(str(item.get("tool_call", "")))
@@ -2,6 +2,6 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from ._tool_success import _ToolSuccessEvaluator
5
+ from ._tool_call_success import _ToolCallSuccessEvaluator
6
6
 
7
- __all__ = ["_ToolSuccessEvaluator"]
7
+ __all__ = ["_ToolCallSuccessEvaluator"]
@@ -6,7 +6,12 @@ import os
6
6
  import logging
7
7
  from typing import Dict, Union, List, Optional
8
8
  from typing_extensions import overload, override
9
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
9
+ from azure.ai.evaluation._exceptions import (
10
+ EvaluationException,
11
+ ErrorBlame,
12
+ ErrorCategory,
13
+ ErrorTarget,
14
+ )
10
15
  from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
11
16
  from azure.ai.evaluation._common._experimental import experimental
12
17
 
@@ -15,8 +20,8 @@ logger = logging.getLogger(__name__)
15
20
 
16
21
 
17
22
  @experimental
18
- class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
19
- """The Tool Success evaluator determines whether tool calls done by an AI agent includes failures or not.
23
+ class _ToolCallSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
24
+ """The Tool Call Success evaluator determines whether tool calls done by an AI agent includes failures or not.
20
25
 
21
26
  This evaluator focuses solely on tool call results and tool definitions, disregarding user's query to
22
27
  the agent, conversation history and agent's final response. Although tool definitions is optional,
@@ -36,34 +41,34 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
36
41
 
37
42
  .. admonition:: Example:
38
43
  .. literalinclude:: ../samples/evaluation_samples_evaluate.py
39
- :start-after: [START tool_success_evaluator]
40
- :end-before: [END tool_success_evaluator]
44
+ :start-after: [START tool_call_success_evaluator]
45
+ :end-before: [END tool_call_success_evaluator]
41
46
  :language: python
42
47
  :dedent: 8
43
- :caption: Initialize and call a _ToolSuccessEvaluator with a tool definitions and response.
48
+ :caption: Initialize and call a _ToolCallSuccessEvaluator with a tool definitions and response.
44
49
 
45
50
  .. admonition:: Example using Azure AI Project URL:
46
51
 
47
52
  .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
48
- :start-after: [START tool_success_evaluator]
49
- :end-before: [END tool_success_evaluator]
53
+ :start-after: [START tool_call_success_evaluator]
54
+ :end-before: [END tool_call_success_evaluator]
50
55
  :language: python
51
56
  :dedent: 8
52
- :caption: Initialize and call a _ToolSuccessEvaluator using Azure AI Project URL in the following
57
+ :caption: Initialize and call a _ToolCallSuccessEvaluator using Azure AI Project URL in the following
53
58
  format https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
54
59
 
55
60
  """
56
61
 
57
- _PROMPTY_FILE = "tool_success.prompty"
58
- _RESULT_KEY = "tool_success"
62
+ _PROMPTY_FILE = "tool_call_success.prompty"
63
+ _RESULT_KEY = "tool_call_success"
59
64
  _OPTIONAL_PARAMS = ["tool_definitions"]
60
65
 
61
- id = "azureai://built-in/evaluators/tool_success"
66
+ id = "azureai://built-in/evaluators/tool_call_success"
62
67
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
63
68
 
64
69
  @override
65
70
  def __init__(self, model_config, *, credential=None, **kwargs):
66
- """Initialize the Tool Success evaluator."""
71
+ """Initialize the Tool Call Success evaluator."""
67
72
  current_dir = os.path.dirname(__file__)
68
73
  prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
69
74
  super().__init__(
@@ -86,7 +91,7 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
86
91
  """Evaluate tool call success for a given response, and optionally tool definitions.
87
92
 
88
93
  Example with list of messages:
89
- evaluator = _ToolSuccessEvaluator(model_config)
94
+ evaluator = _ToolCallSuccessEvaluator(model_config)
90
95
  response = [{'createdAt': 1700000070, 'run_id': '0', 'role': 'assistant',
91
96
  'content': [{'type': 'text', 'text': '**Day 1:** Morning: Visit Louvre Museum (9 AM - 12 PM)...'}]}]
92
97
 
@@ -97,7 +102,7 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
97
102
  :paramtype response: Union[str, List[dict]]
98
103
  :keyword tool_definitions: Optional tool definitions to use for evaluation.
99
104
  :paramtype tool_definitions: Union[dict, List[dict]]
100
- :return: A dictionary with the tool success evaluation results.
105
+ :return: A dictionary with the Tool Call Success evaluation results.
101
106
  :rtype: Dict[str, Union[str, float]]
102
107
  """
103
108
 
@@ -116,7 +121,7 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
116
121
 
117
122
  @override
118
123
  async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]: # type: ignore[override]
119
- """Do Tool Success evaluation.
124
+ """Do Tool Call Success evaluation.
120
125
 
121
126
  :param eval_input: The input to the evaluator. Expected to contain whatever inputs are
122
127
  needed for the _flow method
@@ -126,19 +131,19 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
126
131
  """
127
132
  if "response" not in eval_input:
128
133
  raise EvaluationException(
129
- message="response is a required input to the Tool Success evaluator.",
130
- internal_message="response is a required input to the Tool Success evaluator.",
134
+ message="response is a required input to the Tool Call Success evaluator.",
135
+ internal_message="response is a required input to the Tool Call Success evaluator.",
131
136
  blame=ErrorBlame.USER_ERROR,
132
137
  category=ErrorCategory.MISSING_FIELD,
133
- target=ErrorTarget.TOOL_SUCCESS_EVALUATOR,
138
+ target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
134
139
  )
135
140
  if eval_input["response"] is None or eval_input["response"] == []:
136
141
  raise EvaluationException(
137
- message="response cannot be None or empty for the Tool Success evaluator.",
138
- internal_message="response cannot be None or empty for the Tool Success evaluator.",
142
+ message="response cannot be None or empty for the Tool Call Success evaluator.",
143
+ internal_message="response cannot be None or empty for the Tool Call Success evaluator.",
139
144
  blame=ErrorBlame.USER_ERROR,
140
145
  category=ErrorCategory.INVALID_VALUE,
141
- target=ErrorTarget.TOOL_SUCCESS_EVALUATOR,
146
+ target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
142
147
  )
143
148
 
144
149
  eval_input["tool_calls"] = _reformat_tool_calls_results(eval_input["response"], logger)
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: Tool Success
2
+ name: Tool Call Success
3
3
  description: Evaluates whether a Tool call was successful or resulted in a technical error
4
4
  model:
5
5
  api: chat
@@ -85,7 +85,7 @@ class ErrorTarget(Enum):
85
85
  SIMILARITY_EVALUATOR = "SimilarityEvaluator"
86
86
  FLUENCY_EVALUATOR = "FluencyEvaluator"
87
87
  RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
88
- TOOL_SUCCESS_EVALUATOR = "_ToolSuccessEvaluator"
88
+ TOOL_CALL_SUCCESS_EVALUATOR = "_ToolCallSuccessEvaluator"
89
89
  TASK_ADHERENCE_EVALUATOR = "TaskAdherenceEvaluator"
90
90
  TASK_COMPLETION_EVALUATOR = "_TaskCompletionEvaluator"
91
91
  INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
@@ -3,4 +3,4 @@
3
3
  # ---------------------------------------------------------
4
4
  # represents upcoming version
5
5
 
6
- VERSION = "1.13.3"
6
+ VERSION = "1.13.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: azure-ai-evaluation
3
- Version: 1.13.3
3
+ Version: 1.13.5
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -418,6 +418,18 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
418
418
 
419
419
  # Release History
420
420
 
421
+ ## 1.13.5 (2025-11-10)
422
+
423
+ ### Bugs Fixed
424
+
425
+ - **TaskAdherenceEvaluator:** treat tool definitions as optional so evaluations with only query/response inputs no longer raise “Either 'conversation' or individual inputs must be provided.”
426
+
427
+ ## 1.13.4 (2025-11-10)
428
+
429
+ ### Bugs Fixed
430
+
431
+ - Handle input data for evaluation result when evaluators.
432
+
421
433
  ## 1.13.3 (2025-11-08)
422
434
 
423
435
  ### Other Changes
@@ -1,12 +1,12 @@
1
- azure/ai/evaluation/__init__.py,sha256=I0gt0HLyxnqtTIoj5pQgj1CtmzXvuEZb_sXuVNPGtzw,5419
2
- azure/ai/evaluation/_constants.py,sha256=g8yzM6YeIhJoC4ZIqHzyakjMMJ7cXHF5wUj1gvz9D6I,8081
3
- azure/ai/evaluation/_eval_mapping.py,sha256=szSJ4HSpct_JsoUX3svYIioW8cSqsKUVKLCshrcpSBE,3299
1
+ azure/ai/evaluation/__init__.py,sha256=Qmb4OXIHXKtYH0ndXkH1onUYX9HUhqTXJiOAYwwnK9M,5522
2
+ azure/ai/evaluation/_constants.py,sha256=6lWU_3Nse--HCBS8U7iZMYKlL7tLhtmL7wYYpnz564M,8039
3
+ azure/ai/evaluation/_eval_mapping.py,sha256=awjtLDq0RtTrc5AvBBHWXj0mMyinMk_Ln2mkI1yvCPg,3335
4
4
  azure/ai/evaluation/_evaluator_definition.py,sha256=Z33dPXVAhMGEdn6kcE5d-p_v8VtE6Hpm6-PbNIqKh0Y,2901
5
- azure/ai/evaluation/_exceptions.py,sha256=AsbYxt8G8CpET2bbJzvtYJAw1bbNSI1T77k65ayuHNc,6274
5
+ azure/ai/evaluation/_exceptions.py,sha256=90L9njT2aFznT4DGo5zuksuMcZRvOYUoxJ2nk90NfPo,6283
6
6
  azure/ai/evaluation/_http_utils.py,sha256=d1McnMRT5lnaoR8x4r3pkfH2ic4T3JArclOK4kAaUmg,17261
7
7
  azure/ai/evaluation/_model_configurations.py,sha256=OZ-QskAbBX7lwjeyum6EOoh1oWSi7lplghnfMoFPpUU,5167
8
8
  azure/ai/evaluation/_user_agent.py,sha256=SgUm6acnwyoENu8KroyaWRrJroJNqLZBccpQoeKyrHw,1144
9
- azure/ai/evaluation/_version.py,sha256=BliTNWOypBtp-bZgH9-o0e6Rtu7xf-1V46JsNOQJ06c,230
9
+ azure/ai/evaluation/_version.py,sha256=UoLHgo6no_BDqgyiNVsfetHiZGXkGZGvSkj1U6I8FJ8,230
10
10
  azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  azure/ai/evaluation/_aoai/__init__.py,sha256=0Ji05ShlsJaytvexXUpCe69t0jSNd3PpNbhr0zCkr6A,265
12
12
  azure/ai/evaluation/_aoai/aoai_grader.py,sha256=ey8YWNstlFq9SCu-kH3oFFsHCJcBeEabQAEAGENDy2c,6228
@@ -22,7 +22,7 @@ azure/ai/evaluation/_azure/_models.py,sha256=Vxcg7WfpAyxLQ-EesZzLGqopguV9Oohfjf-
22
22
  azure/ai/evaluation/_azure/_token_manager.py,sha256=EoNbDLweJOukqReOKJVgnQ9AFLVuQeK3KJrJtZKTIAA,5489
23
23
  azure/ai/evaluation/_common/__init__.py,sha256=5PO6eGs0zdarngR5shIsEn5gp2XfpUnLyt4HR79Hf5o,933
24
24
  azure/ai/evaluation/_common/_experimental.py,sha256=GVtSn9r1CeR_yEa578dJVNDJ3P24eqe8WYdH7llbiQY,5694
25
- azure/ai/evaluation/_common/constants.py,sha256=BI3y7AjMs0gvl2J2FbreUpfFgwJ93zoYLzxmC5Uq-0s,6573
25
+ azure/ai/evaluation/_common/constants.py,sha256=AE5nhGKXZTwbq51m69HgdlCf5-ybyPGMf6tzlRxkPfE,6578
26
26
  azure/ai/evaluation/_common/evaluation_onedp_client.py,sha256=9QSK5lvMHtrlT3mrQwORZC-xjaS6suheYKwbtWKUjUM,7512
27
27
  azure/ai/evaluation/_common/math.py,sha256=d4bwWe35_RWDIZNcbV1BTBbHNx2QHQ4-I3EofDyyNE0,2863
28
28
  azure/ai/evaluation/_common/rai_service.py,sha256=tr5gd6j-SH7VeFRCzCPyZef0XV9xq9RqO5rrOlJ5Ztw,47970
@@ -99,7 +99,7 @@ azure/ai/evaluation/_converters/_models.py,sha256=x6GxLItQtvccv8q6jWtOUmQL1ZdeIA
99
99
  azure/ai/evaluation/_converters/_sk_services.py,sha256=NfjflVgeJUF0MrvAiUd_uF2magn38Q_MKmHzaY41vOA,18239
100
100
  azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
101
101
  azure/ai/evaluation/_evaluate/_eval_run.py,sha256=57rfW4MkE9LSlQNqdzxvq_nw8xYW-mqPQLw4WY_k-YU,22564
102
- azure/ai/evaluation/_evaluate/_evaluate.py,sha256=Wa7U3XCclwV2uEz6-8OprwewF51RXn2jc6SXmX_jrjI,119938
102
+ azure/ai/evaluation/_evaluate/_evaluate.py,sha256=XjZ2rYRPjqCxPr3Asl2vtQ4nXMbuxm8evUve8roM0z4,119950
103
103
  azure/ai/evaluation/_evaluate/_evaluate_aoai.py,sha256=ViMDSEvmVCiJ96ZSiuHv66d0EL4o8Qa_r6jkxwxmB-g,44030
104
104
  azure/ai/evaluation/_evaluate/_utils.py,sha256=teIRlQlEctgFYA8S8FedelFicBrt8g05OPVsE6bq8FI,19751
105
105
  azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=cPLi_MJ_pCp8eKBxJbiSoxgTnN3nDLuaP57dMkKuyhg,552
@@ -119,7 +119,7 @@ azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0U
119
119
  azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=r_21EFCX-2oAO6FxwUnBdUUwS-QgBiC0zGXSW3hXj2I,6335
120
120
  azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=ANvh9mDFW7KMejrgdWqBLjj4SIqEO5WW9gg5pE0RLJk,6798
121
121
  azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=xAymP_CZy4aPzWplMdXgQUQVDIUEMI-0nbgdm_umFYY,498
122
- azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=qB0JmaxITudedxgIHTK6wGJsCHnaul41n6Jm-QGPaaU,34025
122
+ azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=1p9sLBfQDydc5okMK1VvU5agn52hkOlp_8-DkzHP59o,34094
123
123
  azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py,sha256=yYFpoCDe2wMFQck0ykbX8IJBBidk6NT1wUTkVFlVSy8,2728
124
124
  azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=M4gVGxd31QP6xA6U-rKDUC52MzexXOnP_9K2BcEAFPE,15903
125
125
  azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=JSZhRxVKljM4XE4P2DGrJSnD6iWr7tlDIJ8g95rHaGg,9078
@@ -171,7 +171,7 @@ azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTk
171
171
  azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=fw250aoDCQ-oXalmyUoYXxK2p43nxnn4sjVqTVSPvD8,5951
172
172
  azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
173
173
  azure/ai/evaluation/_evaluators/_task_adherence/__init__.py,sha256=9HtNrG7yYX0Ygq3cZoS_0obAvGgmy5HWcsBcPKoB15c,271
174
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py,sha256=e5Tnkk-t_VpZWZvVdGkcE-iPBnljug3Y-Qou9EmgK8k,12672
174
+ azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py,sha256=31DzzKXtgQgoYPQ8OD1TAOZMlv3W7RXKtT0cdeXrCE0,12727
175
175
  azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty,sha256=cmrMjfqJXYtOPUcSCtGSiD8HUe4k53-n07kegjrH0Kg,7387
176
176
  azure/ai/evaluation/_evaluators/_task_completion/__init__.py,sha256=xW5ZKj8-ipSxORbLlSRAV01jHv8V9e0MpOhrBqnuMQY,276
177
177
  azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py,sha256=ASQyPJQn0OWOFmYJ_uFofSptePt7t9tC8sO3LkAV-Gs,9083
@@ -181,6 +181,9 @@ azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_eff
181
181
  azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py,sha256=vYB4Y_3n1LqTiEeZB1O1A0b14wpURBwtW0wPEN2FG9Q,288
182
182
  azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py,sha256=YhuaMKqP8U6XfnW5QruF5-68cTUAWOAmBSSAB54DYPs,14807
183
183
  azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty,sha256=pQhNQC1w12WNBU4sdgyhO9gXhZY1dWrNIBK31ciW-V4,10138
184
+ azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py,sha256=pMYF75Q671a9OFQMBxiINqKHcW6GKf20_QGT594Cmd0,280
185
+ azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py,sha256=0s4UmTyQFg6PPDPKOa-RycNyP8x7xhRrYBnqIvIH_1I,13952
186
+ azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty,sha256=Ti7UWCIC6B3x6Er5rizLwhIvTLEXFt6EiuMIYB_hbmA,10105
184
187
  azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py,sha256=Iu1GBkR76fpMafxOZhG9bTAbVIaR0HciVwbuTeczgrc,293
185
188
  azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py,sha256=cPVw8bNERwQWwJtGYS9juomRvmOzCj6XO8Yx9syUDBk,11617
186
189
  azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty,sha256=JapLVUu_faMUO51Bo9uEy-VIn4sgh5n2uyASkeCnjSc,3028
@@ -190,9 +193,6 @@ azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization
190
193
  azure/ai/evaluation/_evaluators/_tool_selection/__init__.py,sha256=7nMGfFz8WgTg8s195-WHMNE-dhC3YN9P3xmmQQLWNEE,280
191
194
  azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py,sha256=5GLqGpKBx2Tw9TMK2ugimMH7cCuH3zuScsViTxNPp8I,11873
192
195
  azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty,sha256=NorcOvfmlzXqEaaWs_VcmdFpStVmHeJ0cNA8yEyRJlQ,7441
193
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py,sha256=XezZXpi62CHZiDZJ41tBY0kWpB36XYcMh9YVsI6xBz8,267
194
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py,sha256=iWEcAF1dPALxCDrzV6lhN84jpDtHg00R7JO7ut0uTOw,13830
195
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty,sha256=innPK0WyTY7DWtA88Laj9JgRMmT-bQzfEq9Pq68gOSg,10100
196
196
  azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py,sha256=wGZBd_cRDgkuS-0HV9qm81dHK7ScYdKd98xLPtk6EwQ,118
197
197
  azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py,sha256=zDoc7SSaOSVlT-uCurAbsIOMzJ6HAuXk0SfYDrBE3p8,3736
198
198
  azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
@@ -298,8 +298,8 @@ azure/ai/evaluation/simulator/_model_tools/models.py,sha256=SJYI-IJfCo9bitMGfSSn
298
298
  azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
299
299
  azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=2BzSqDDYilDushvR56vMRDmqFIaIYAewdUlUZg_elMg,2182
300
300
  azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=NE6lH4bfmibgMn4NgJtm9_l3PMoHSFrfjjosDJEKM0g,939
301
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
302
- azure_ai_evaluation-1.13.3.dist-info/METADATA,sha256=CqxRu1pcHp1Y910sXdN_9wnMcsKyOexJkhGj8zZAlw8,48602
303
- azure_ai_evaluation-1.13.3.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
304
- azure_ai_evaluation-1.13.3.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
305
- azure_ai_evaluation-1.13.3.dist-info/RECORD,,
301
+ azure_ai_evaluation-1.13.5.dist-info/licenses/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
302
+ azure_ai_evaluation-1.13.5.dist-info/METADATA,sha256=k9CUcOdrbo27BI_G2LkIIJ2bbiOk6odKG327TcJEPEo,48938
303
+ azure_ai_evaluation-1.13.5.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
304
+ azure_ai_evaluation-1.13.5.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
305
+ azure_ai_evaluation-1.13.5.dist-info/RECORD,,