PyPI - azure-ai-evaluation - Versions diffs - 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (35) hide show

azure/ai/evaluation/_evaluate/_evaluate_aoai.py CHANGED Viewed

@@ -3,10 +3,11 @@
 # ---------------------------------------------------------
 import logging
+import re
 from openai import AzureOpenAI, OpenAI
 import pandas as pd
-from typing import Any, Callable, Dict, Tuple, TypeVar, Union, Type, Optional, TypedDict, List
+from typing import Any, Callable, Dict, Tuple, TypeVar, Union, Type, Optional, TypedDict, List, cast, Set
 from time import sleep
 from ._batch_run import CodeClient, ProxyClient
@@ -21,6 +22,15 @@ from azure.ai.evaluation._common._experimental import experimental
 TClient = TypeVar("TClient", ProxyClient, CodeClient)
 LOGGER = logging.getLogger(__name__)
+# Precompiled regex for extracting data paths from mapping expressions of the form
+# ${data.some.dotted.path}. Compiled once at import time to avoid repeated
+# recompilation on each call to _generate_data_source_config.
+DATA_PATH_PATTERN = re.compile(r"^\$\{data\.([a-zA-Z0-9_\.]+)\}$")
+# Canonical top-level wrapper key expected in nested JSONL evaluation rows.
+# Centralizing here avoids magic strings sprinkled through schema/content generation code.
+WRAPPER_KEY = "item"
 class OAIEvalRunCreationInfo(TypedDict, total=True):
     """Configuration for an evaluator"""
@@ -102,7 +112,7 @@ def _begin_aoai_evaluation(
 def _begin_single_aoai_evaluation(
-    graders: Dict[str, AzureOpenAIGrader], data: pd.DataFrame, column_mapping: Dict[str, str], run_name: str
+    graders: Dict[str, AzureOpenAIGrader], data: pd.DataFrame, column_mapping: Optional[Dict[str, str]], run_name: str
 ) -> OAIEvalRunCreationInfo:
     """
     Use the AOAI SDK to start an evaluation of the inputted dataset against the supplied graders.
@@ -111,8 +121,10 @@ def _begin_single_aoai_evaluation(
     :param graders: The graders to use for the evaluation. Should be a dictionary of string to AOAIGrader.
     :type graders: Dict[str, AoaiGrader]
-    :param data_source_config: The data source configuration to apply to the
-    :type data_source_config: pd.DataFrame
+    :param data: The input data to evaluate, as a pandas DataFrame.
+    :type data: pd.DataFrame
+    :param column_mapping: The column mapping to apply. If None, an empty mapping is used.
+    :type column_mapping: Optional[Dict[str, str]]
     :param run_name: The name of the evaluation run.
     :type run_name: str
     :return: A tuple containing the eval group ID and eval run ID of the resultant eval run, as well as a dictionary
@@ -130,7 +142,8 @@ def _begin_single_aoai_evaluation(
     for name, grader in graders.items():
         grader_name_list.append(name)
         grader_list.append(grader._grader_config)
-    data_source_config = _generate_data_source_config(data, column_mapping)
+    effective_column_mapping: Dict[str, str] = column_mapping or {}
+    data_source_config = _generate_data_source_config(data, effective_column_mapping)
     # Create eval group
     # import pdb; pdb.set_trace()
@@ -154,7 +167,7 @@ def _begin_single_aoai_evaluation(
         grader_name_map[criteria.id] = name
     # Create eval run
-    eval_run_id = _begin_eval_run(client, eval_group_info.id, run_name, data, column_mapping)
+    eval_run_id = _begin_eval_run(client, eval_group_info.id, run_name, data, effective_column_mapping)
     LOGGER.info(
         f"AOAI: Eval run created with id {eval_run_id}."
         + " Results will be retrieved after normal evaluation is complete..."
@@ -272,8 +285,33 @@ def _get_single_run_results(
     for row_result in all_results:
         listed_results["index"].append(row_result.datasource_item_id)
         for single_grader_row_result in row_result.results:
-            grader_name = run_info["grader_name_map"][single_grader_row_result["name"]]
-            for name, value in single_grader_row_result.items():
+            if isinstance(single_grader_row_result, dict):
+                result_dict = single_grader_row_result
+            elif hasattr(single_grader_row_result, "model_dump"):
+                result_dict = single_grader_row_result.model_dump()
+            elif hasattr(single_grader_row_result, "dict"):
+                result_dict = single_grader_row_result.dict()
+            elif hasattr(single_grader_row_result, "__dict__"):
+                result_dict = vars(single_grader_row_result)
+            else:
+                raise EvaluationException(
+                    message=("Unsupported AOAI evaluation result type: " f"{type(single_grader_row_result)!r}."),
+                    blame=ErrorBlame.UNKNOWN,
+                    category=ErrorCategory.FAILED_EXECUTION,
+                    target=ErrorTarget.AOAI_GRADER,
+                )
+            grader_result_name = result_dict.get("name", None)
+            if grader_result_name is None:
+                raise EvaluationException(
+                    message="AOAI evaluation response missing grader result name; unable to map to original grader.",
+                    blame=ErrorBlame.UNKNOWN,
+                    category=ErrorCategory.FAILED_EXECUTION,
+                    target=ErrorTarget.AOAI_GRADER,
+                )
+            grader_name = run_info["grader_name_map"][grader_result_name]
+            for name, value in result_dict.items():
                 if name in ["name"]:
                     continue
                 if name.lower() == "passed":
@@ -454,36 +492,166 @@ def _get_graders_and_column_mappings(
     ]
+def _build_schema_tree_from_paths(
+    paths: List[str],
+    force_leaf_type: str = "string",
+) -> Dict[str, Any]:
+    """
+    Build a nested JSON schema (object) from a list of dot-delimited paths.
+    Each path represents a leaf. Intermediate segments become nested object properties.
+    Example input paths:
+        ["item.query",
+         "item.context.company.policy.security.passwords.rotation_days",
+         "item.context.company.policy.security.network.vpn.required"]
+    Returns schema fragment:
+    {
+      "type": "object",
+      "properties": {
+        "item": {
+          "type": "object",
+          "properties": {
+            "query": {"type": "string"},
+            "context": {
+              "type": "object",
+              "properties": {
+                "company": { ... }
+              },
+              "required": ["company"]
+            }
+          },
+          "required": ["query", "context"]
+        }
+      },
+      "required": ["item"]
+    }
+    :param paths: A list of dot-delimited strings, each representing a leaf path
+        in the logical object hierarchy (e.g. ``"item.context.company.policy.security.passwords.rotation_days"``).
+        Empty path segments are ignored.
+    :type paths: List[str]
+    :param force_leaf_type: The JSON Schema ``type`` value to assign to every leaf node
+        produced from the supplied paths. Defaults to ``"string"``.
+    :type force_leaf_type: str
+    :return: A JSON Schema fragment describing the hierarchical structure implied by
+        the input paths. The returned schema root always has ``type: object`` with
+        recursively nested ``properties`` / ``required`` keys.
+    :rtype: Dict[str, Any]
+    """
+    # Build tree where each node: {"__children__": { segment: node, ... }, "__leaf__": bool }
+    root: Dict[str, Any] = {"__children__": {}, "__leaf__": False}
+    def insert(path: str):
+        parts = [p for p in path.split(".") if p]
+        node = root
+        for i, part in enumerate(parts):
+            children = node["__children__"]
+            if part not in children:
+                children[part] = {"__children__": {}, "__leaf__": False}
+            node = children[part]
+            if i == len(parts) - 1:
+                node["__leaf__"] = True
+    for p in paths:
+        insert(p)
+    def to_schema(node: Dict[str, Any]) -> Dict[str, Any]:
+        children = node["__children__"]
+        if not children:
+            # Leaf node
+            return {"type": force_leaf_type}
+        props = {}
+        required = []
+        for name, child in children.items():
+            props[name] = to_schema(child)
+            required.append(name)
+        return {
+            "type": "object",
+            "properties": props,
+            "required": required,
+        }
+    return to_schema(root)
 def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]) -> Dict[str, Any]:
-    """Produce a data source config that maps all columns from the supplied data source into
-    the OAI API. The mapping is naive unless a column mapping is provided, in which case
-    the column mapping's values overrule the relevant naive mappings
+    """
+    Produce a data source config (JSON schema) that reflects nested object structure
+    when column mappings reference dotted paths (e.g., item.context.company...).
+    Backward compatibility:
+      - If all referenced source paths are single tokens (flat), fall back to legacy flat schema.
+      - Otherwise build a nested object schema covering only referenced leaves.
-      :param input_data_df: The input data to be evaluated, as produced by the `_validate_and_load_data`
-    helper function.
     :type input_data_df: pd.DataFrame
-    :param column_mapping: The column mapping to use for the evaluation. If None, the default mapping will be used.
+    :param input_data_df: The input data to be evaluated, as produced by the `_validate_and_load_data`
     :type column_mapping: Optional[Dict[str, str]]
+    :param column_mapping: The column mapping to use for the evaluation. If None, the default mapping will be used.
     :return: A dictionary that can act as data source config for OAI evaluation group creation.
     :rtype: Dict[str, Any]
+    helper function.
     """
+    # Extract referenced data paths from mapping values of the form ${data.<path>} (ignore ${run.outputs.*})
+    referenced_paths: List[str] = []
+    for v in column_mapping.values():
+        m = DATA_PATH_PATTERN.match(v)
+        if m:
+            referenced_paths.append(m.group(1))
+    # Decide if we have nested structures
+    has_nested = any("." in p for p in referenced_paths)
+    if not referenced_paths or not has_nested:
+        # Legacy flat behavior (existing logic): treat each mapping key as independent string field
+        data_source_config = {
+            "type": "custom",
+            "item_schema": {
+                "type": "object",
+                "properties": {},
+                "required": [],
+            },
+        }
+        props = data_source_config["item_schema"]["properties"]
+        req = data_source_config["item_schema"]["required"]
+        for key in column_mapping.keys():
+            props[key] = {"type": "string"}
+            req.append(key)
+        return data_source_config
+    # NEW: If all nested paths share the same first segment (e.g. 'item'),
+    # treat that segment as the wrapper already provided by the JSONL line ("item": {...})
+    # so we exclude it from the schema (schema describes the *inside* of "item").
+    first_segments = {p.split(".")[0] for p in referenced_paths}
+    strip_wrapper = False
+    wrapper_name = None
+    if len(first_segments) == 1:
+        only_seg = next(iter(first_segments))
+        # We only strip if that segment looks like the canonical wrapper.
+        if only_seg == WRAPPER_KEY:
+            strip_wrapper = True
+            wrapper_name = only_seg
+    effective_paths = referenced_paths
+    if strip_wrapper:
+        stripped = []
+        for p in referenced_paths:
+            parts = p.split(".", 1)
+            if len(parts) == 2:
+                stripped.append(parts[1])  # drop leading 'item.'
+            else:
+                # Path was just 'item' (no leaf) – ignore; it doesn't define a leaf value.
+                continue
+        # If stripping produced at least one usable path, adopt; else fall back to original.
+        if stripped:
+            effective_paths = stripped
-    data_source_config = {
+    nested_schema = _build_schema_tree_from_paths(effective_paths, force_leaf_type="string")
+    return {
         "type": "custom",
-        "item_schema": {
-            "type": "object",
-            "properties": {},
-            "required": [],
-        },
+        "item_schema": nested_schema,
     }
-    properties = data_source_config["item_schema"]["properties"]
-    required = data_source_config["item_schema"]["required"]
-    for key in column_mapping.keys():
-        properties[key] = {
-            "type": "string",
-        }
-        required.append(key)
-    return data_source_config
 def _generate_default_data_source_config(input_data_df: pd.DataFrame) -> Dict[str, Any]:
@@ -518,9 +686,9 @@ def _generate_default_data_source_config(input_data_df: pd.DataFrame) -> Dict[st
 def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]) -> Dict[str, Any]:
     """
-    Given a dataframe of data to be evaluated, and an optional column mapping,
-    produce a dictionary can be used as the data source input for an OAI evaluation run.
+    Given a dataframe of data to be evaluated, and a column mapping,
+    produce a dictionary that can be used as the data source input for an OAI evaluation run.
+    Builds a nested 'item' object mirroring the hierarchical paths in the mapping values.
     :param input_data_df: The input data to be evaluated, as produced by the `_validate_and_load_data`
         helper function.
     :type input_data_df: pd.DataFrame
@@ -529,25 +697,86 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
     :return: A dictionary that can be used as the data source input for an OAI evaluation run.
     :rtype: Dict[str, Any]
     """
-    content = []
-    column_to_source_map = {}
-    # Convert from column mapping's format to figure out actual column names in
-    # input dataframe, and map those to the appropriate OAI input names.
+    # Gather path specs: list of tuples (original_mapping_value, relative_parts, dataframe_column_name)
+    # relative_parts excludes the wrapper (so schema + content align).
+    path_specs: List[Tuple[str, List[str], str]] = []
     for name, formatted_entry in column_mapping.items():
-        # From "${" from start and "}" from end before splitting.
-        entry_pieces = formatted_entry[2:-1].split(".")
-        if len(entry_pieces) == 2 and entry_pieces[0] == "data":
-            column_to_source_map[name] = entry_pieces[1]
-        elif len(entry_pieces) == 3 and entry_pieces[0] == "run" and entry_pieces[1] == "outputs":
-            column_to_source_map[name] = f"__outputs.{entry_pieces[2]}"
-    # Using the above mapping, transform the input dataframe into a content
-    # dictionary that'll work in an OAI data source.
-    for row in input_data_df.iterrows():
-        row_dict = {}
-        for oai_key, dataframe_key in column_to_source_map.items():
-            row_dict[oai_key] = str(row[1][dataframe_key])
-        content.append({"item": row_dict})
+        if not (
+            isinstance(formatted_entry, str) and formatted_entry.startswith("${") and formatted_entry.endswith("}")
+        ):
+            continue
+        body = formatted_entry[2:-1]  # remove ${ }
+        pieces = body.split(".")
+        if not pieces:
+            continue
+        if pieces[0] == "data":
+            # Data path: data.<maybe wrapper>.<...>
+            if len(pieces) == 1:
+                continue
+            source_path = ".".join(pieces[1:])  # e.g. item.context.company...
+            # Skip mapping of wrapper itself
+            if source_path == WRAPPER_KEY:
+                continue
+            # Determine dataframe column name (it is the full dotted path as flattened earlier)
+            dataframe_col = source_path
+            # Relative parts for nested insertion (drop leading wrapper if present)
+            if source_path.startswith(WRAPPER_KEY + "."):
+                relative_path = source_path[len(WRAPPER_KEY) + 1 :]
+            else:
+                # Path not under wrapper; treat its segments as is (will live directly under wrapper)
+                relative_path = source_path
+            relative_parts = [p for p in relative_path.split(".") if p]
+            # Defensive: if mapping alias differs from leaf, prefer actual path leaf to stay consistent.
+            # (If you want alias override, replace relative_parts[-1] with name when name != path_leaf.)
+            if not relative_parts:
+                continue
+            path_specs.append((formatted_entry, relative_parts, dataframe_col))
+        elif pieces[0] == "run" and len(pieces) >= 3 and pieces[1] == "outputs":
+            # Target / run outputs become __outputs.<rest> columns
+            run_col = "__outputs." + ".".join(pieces[2:])
+            leaf_name = pieces[-1]
+            path_specs.append((formatted_entry, [leaf_name], run_col))
+    content: List[Dict[str, Any]] = []
+    for _, row in input_data_df.iterrows():
+        item_root: Dict[str, Any] = {}
+        for _, rel_parts, df_col in path_specs:
+            # Safely fetch value
+            val = row.get(df_col, None)
+            # Convert value to string to match schema's "type": "string" leaves.
+            # (If you later infer types, you can remove the stringify.)
+            if val is None:
+                str_val = ""
+            elif isinstance(val, (str, int, float, bool)):
+                str_val = str(val)
+            else:
+                # Lists / dicts / other -> string for now
+                str_val = str(val)
+            # Insert into nested dict
+            cursor = item_root
+            for seg in rel_parts[:-1]:
+                nxt = cursor.get(seg)
+                if not isinstance(nxt, dict):
+                    nxt = {}
+                    cursor[seg] = nxt
+                cursor = nxt
+            leaf_key = rel_parts[-1]
+            cursor[leaf_key] = str_val
+        content.append({WRAPPER_KEY: item_root})
     return {
         "type": "jsonl",
@@ -586,7 +815,7 @@ def _begin_eval_run(
     data_source = _get_data_source(input_data_df, column_mapping)
     eval_run = client.evals.runs.create(
         eval_id=eval_group_id,
-        data_source=data_source,
+        data_source=cast(Any, data_source),  # Cast for type checker: dynamic schema dict accepted by SDK at runtime
         name=run_name,
         metadata={"sample_generation": "off", "file_format": "jsonl", "is_foundry_eval": "true"},
         # TODO decide if we want to add our own timeout value?

azure/ai/evaluation/_evaluate/_utils.py CHANGED Viewed

@@ -330,7 +330,11 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
         json.dump(data_dict, f, ensure_ascii=False)
     # Use tqdm.write to print message without interfering with any current progress bar
-    tqdm.write(f'Evaluation results saved to "{p.resolve()}".\n')
+    # Fall back to regular print if tqdm.write fails (e.g., when progress bar is closed)
+    try:
+        tqdm.write(f'Evaluation results saved to "{p.resolve()}".\n')
+    except Exception:
+        print(f'Evaluation results saved to "{p.resolve()}".\n')
 def _apply_column_mapping(
@@ -460,7 +464,7 @@ class JSONLDataFileLoader:
         self.filename = filename
     def load(self) -> pd.DataFrame:
-        return pd.read_json(self.filename, lines=True)
+        return pd.read_json(self.filename, lines=True, dtype=object)
 class CSVDataFileLoader:
@@ -468,7 +472,7 @@ class CSVDataFileLoader:
         self.filename = filename
     def load(self) -> pd.DataFrame:
-        return pd.read_csv(self.filename)
+        return pd.read_csv(self.filename, dtype=str)
 class DataLoaderFactory:

azure/ai/evaluation/_evaluators/_coherence/_coherence.py CHANGED Viewed

@@ -23,6 +23,11 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the coherence evaluator. Default is 3.
     :type threshold: int
+    :param credential: The credential for authenticating to Azure AI service.
+    :type credential: ~azure.core.credentials.TokenCredential
+    :keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
+        This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
+    :paramtype is_reasoning_model: bool
     .. admonition:: Example:
@@ -66,7 +71,7 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     @override
-    def __init__(self, model_config, *, threshold=3, credential=None):
+    def __init__(self, model_config, *, threshold=3, credential=None, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -78,6 +83,7 @@ class CoherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             threshold=threshold,
             credential=credential,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_common/_base_eval.py CHANGED Viewed

@@ -4,12 +4,14 @@
 import inspect
 from abc import ABC, abstractmethod
+import json
 from typing import (
     Any,
     Callable,
     Dict,
     Generic,
     List,
+    Tuple,
     TypedDict,
     TypeVar,
     Union,
@@ -510,6 +512,67 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
         return tool_calls
+    def _extract_tool_names_and_params_from_response(self, response) -> List[Tuple[str, Dict[str, str]]]:
+        """Extract tool names and parameters from the response.
+        :param response: The response to parse.
+        :type response: Union[str, List[dict]]
+        :return: List of tuples containing (tool_name, parameters_dict) extracted from the response.
+        :rtype: List[Tuple[str, Dict[str, str]]]
+        """
+        tool_calls = self._parse_tools_from_response(response)
+        tool_name_param_pairs = []
+        for tool_call in tool_calls:
+            if not isinstance(tool_call, dict):
+                raise EvaluationException(
+                    "Tool call must be a dictionary.",
+                    internal_message=str(tool_call),
+                    target=ErrorTarget.EVALUATE,
+                    category=ErrorCategory.UNKNOWN,
+                )
+            if tool_call.get("type") != "tool_call":
+                raise EvaluationException(
+                    "Tool call must have 'type' set to 'tool_call'.",
+                    internal_message=str(tool_call),
+                    target=ErrorTarget.EVALUATE,
+                    category=ErrorCategory.INVALID_VALUE,
+                )
+            if "name" not in tool_call:
+                raise EvaluationException(
+                    "Tool call missing 'name' field.",
+                    internal_message=str(tool_call),
+                    target=ErrorTarget.EVALUATE,
+                    category=ErrorCategory.MISSING_FIELD,
+                )
+            tool_name = str(tool_call["name"]).strip()
+            # Extract parameters/arguments
+            parameters = {}
+            if "arguments" in tool_call:
+                args = tool_call["arguments"]
+                if isinstance(args, dict):
+                    # Convert all values to strings for consistent comparison
+                    parameters = {str(k): str(v) for k, v in args.items()}
+                elif isinstance(args, str):
+                    # If arguments is a string, try to parse it as JSON
+                    try:
+                        parsed_args = json.loads(args)
+                        if isinstance(parsed_args, dict):
+                            parameters = {str(k): str(v) for k, v in parsed_args.items()}
+                    except json.JSONDecodeError:
+                        raise EvaluationException(
+                            "Failed to parse tool call arguments as JSON.",
+                            internal_message=str(tool_call),
+                            target=ErrorTarget.EVALUATE,
+                            category=ErrorCategory.INVALID_VALUE,
+                        )
+            tool_name_param_pairs.append((tool_name, parameters))
+        return tool_name_param_pairs
     async def _real_call(self, **kwargs) -> Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]:
         """The asynchronous call where real end-to-end evaluation logic is performed.
@@ -532,14 +595,25 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
                         base_key = key[:-6]  # Remove "_score" suffix
                         result_key = f"{base_key}_result"
                         threshold_key = f"{base_key}_threshold"
-                        result[threshold_key] = self._threshold
+                        threshold_value = (
+                            self._threshold.get(base_key) if isinstance(self._threshold, dict) else self._threshold
+                        )
+                        if not isinstance(threshold_value, (int, float)):
+                            raise EvaluationException(
+                                "Threshold value must be a number.",
+                                internal_message=str(threshold_value),
+                                target=ErrorTarget.EVALUATE,
+                                category=ErrorCategory.INVALID_VALUE,
+                            )
+                        result[threshold_key] = threshold_value
                         if self._higher_is_better:
-                            if float(score_value) >= self._threshold:
+                            if float(score_value) >= threshold_value:
                                 result[result_key] = EVALUATION_PASS_FAIL_MAPPING[True]
                             else:
                                 result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]
                         else:
-                            if float(score_value) <= self._threshold:
+                            if float(score_value) <= threshold_value:
                                 result[result_key] = EVALUATION_PASS_FAIL_MAPPING[True]
                             else:
                                 result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]

azure/ai/evaluation/_evaluators/_fluency/_fluency.py CHANGED Viewed

@@ -25,6 +25,11 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the fluency evaluator. Default is 3.
     :type threshold: int
+    :param credential: The credential for authenticating to Azure AI service.
+    :type credential: ~azure.core.credentials.TokenCredential
+    :keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
+        This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
+    :paramtype is_reasoning_model: bool
     .. admonition:: Example:
@@ -68,7 +73,7 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     @override
-    def __init__(self, model_config, *, credential=None, threshold=3):
+    def __init__(self, model_config, *, credential=None, threshold=3, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self._threshold = threshold
@@ -80,6 +85,7 @@ class FluencyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             threshold=threshold,
             credential=credential,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py CHANGED Viewed

@@ -49,6 +49,11 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         ~azure.ai.evaluation.OpenAIModelConfiguration]
     :param threshold: The threshold for the groundedness evaluator. Default is 3.
     :type threshold: int
+    :param credential: The credential for authenticating to Azure AI service.
+    :type credential: ~azure.core.credentials.TokenCredential
+    :keyword is_reasoning_model: If True, the evaluator will use reasoning model configuration (o1/o3 models).
+        This will adjust parameters like max_completion_tokens and remove unsupported parameters. Default is False.
+    :paramtype is_reasoning_model: bool
     .. admonition:: Example:
@@ -106,6 +111,7 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
             threshold=threshold,
             credential=credential,
             _higher_is_better=self._higher_is_better,
+            **kwargs,
         )
         self._model_config = model_config
         self.threshold = threshold

azure/ai/evaluation/_evaluators/_path_efficiency/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from ._path_efficiency import PathEfficiencyEvaluator
+__all__ = ["PathEfficiencyEvaluator"]

azure-ai-evaluation 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl