PyPI - azure-ai-evaluation - Versions diffs - 1.12.0__py3-none-any.whl → 1.13.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.12.0py3-none-any.whl → 1.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

azure/ai/evaluation/_evaluate/_evaluate_aoai.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import json
 import logging
 import re
@@ -60,6 +61,7 @@ def _split_evaluators_and_grader_configs(
     :return: Tuple of two dictionaries, the first containing evaluators and the second containing AOAI graders.
     :rtype: Tuple[Dict[str, Callable], Dict[str, AoaiGrader]]
     """
+    LOGGER.info(f"AOAI: Splitting {len(evaluators)} evaluators into AOAI graders and standard evaluators...")
     true_evaluators = {}
     aoai_graders = {}
     for key, value in evaluators.items():
@@ -67,6 +69,7 @@ def _split_evaluators_and_grader_configs(
             aoai_graders[key] = value
         else:
             true_evaluators[key] = value
+    LOGGER.info(f"AOAI: Found {len(aoai_graders)} AOAI graders and {len(true_evaluators)} standard evaluators.")
     return true_evaluators, aoai_graders
@@ -103,11 +106,18 @@ def _begin_aoai_evaluation(
     LOGGER.info("AOAI: Aoai graders detected among evaluator inputs. Preparing to create OAI eval group...")
     all_eval_run_info: List[OAIEvalRunCreationInfo] = []
-    for selected_graders, selected_column_mapping in _get_graders_and_column_mappings(graders, column_mappings):
+    grader_mapping_list = list(_get_graders_and_column_mappings(graders, column_mappings))
+    LOGGER.info(f"AOAI: Will create {len(grader_mapping_list)} separate evaluation run(s) based on column mappings.")
+    for idx, (selected_graders, selected_column_mapping) in enumerate(grader_mapping_list):
+        LOGGER.info(
+            f"AOAI: Starting evaluation run {idx + 1}/{len(grader_mapping_list)} with {len(selected_graders)} grader(s)..."
+        )
         all_eval_run_info.append(
             _begin_single_aoai_evaluation(selected_graders, data, selected_column_mapping, run_name)
         )
+    LOGGER.info(f"AOAI: Successfully created {len(all_eval_run_info)} evaluation run(s).")
     return all_eval_run_info
@@ -133,6 +143,7 @@ def _begin_single_aoai_evaluation(
     """
     # Format data for eval group creation
+    LOGGER.info(f"AOAI: Preparing evaluation for {len(graders)} grader(s): {list(graders.keys())}")
     grader_name_list = []
     grader_list = []
     # It's expected that all graders supplied for a single eval run use the same credentials
@@ -143,10 +154,12 @@ def _begin_single_aoai_evaluation(
         grader_name_list.append(name)
         grader_list.append(grader._grader_config)
     effective_column_mapping: Dict[str, str] = column_mapping or {}
+    LOGGER.info(f"AOAI: Generating data source config with {len(effective_column_mapping)} column mapping(s)...")
     data_source_config = _generate_data_source_config(data, effective_column_mapping)
+    LOGGER.info(f"AOAI: Data source config generated with schema type: {data_source_config.get('type')}")
     # Create eval group
-    # import pdb; pdb.set_trace()
+    LOGGER.info(f"AOAI: Creating eval group with {len(grader_list)} testing criteria...")
     eval_group_info = client.evals.create(
         data_source_config=data_source_config, testing_criteria=grader_list, metadata={"is_foundry_eval": "true"}
     )
@@ -167,6 +180,7 @@ def _begin_single_aoai_evaluation(
         grader_name_map[criteria.id] = name
     # Create eval run
+    LOGGER.info(f"AOAI: Creating eval run '{run_name}' with {len(data)} data rows...")
     eval_run_id = _begin_eval_run(client, eval_group_info.id, run_name, data, effective_column_mapping)
     LOGGER.info(
         f"AOAI: Eval run created with id {eval_run_id}."
@@ -197,13 +211,16 @@ def _get_evaluation_run_results(all_run_info: List[OAIEvalRunCreationInfo]) -> T
     :raises EvaluationException: If the evaluation run fails or is not completed before timing out.
     """
+    LOGGER.info(f"AOAI: Retrieving results from {len(all_run_info)} evaluation run(s)...")
     run_metrics = {}
     output_df = pd.DataFrame()
-    for run_info in all_run_info:
+    for idx, run_info in enumerate(all_run_info):
+        LOGGER.info(f"AOAI: Fetching results for run {idx + 1}/{len(all_run_info)} (ID: {run_info['eval_run_id']})...")
         cur_output_df, cur_run_metrics = _get_single_run_results(run_info)
         output_df = pd.concat([output_df, cur_output_df], axis=1)
         run_metrics.update(cur_run_metrics)
+    LOGGER.info(f"AOAI: Successfully retrieved all results. Combined dataframe shape: {output_df.shape}")
     return output_df, run_metrics
@@ -223,8 +240,10 @@ def _get_single_run_results(
     :raises EvaluationException: If the evaluation run fails or is not completed before timing out.
     """
     # Wait for evaluation run to complete
+    LOGGER.info(f"AOAI: Waiting for eval run {run_info['eval_run_id']} to complete...")
     run_results = _wait_for_run_conclusion(run_info["client"], run_info["eval_group_id"], run_info["eval_run_id"])
+    LOGGER.info(f"AOAI: Eval run {run_info['eval_run_id']} completed with status: {run_results.status}")
     if run_results.status != "completed":
         raise EvaluationException(
             message=f"AOAI evaluation run {run_info['eval_group_id']}/{run_info['eval_run_id']}"
@@ -235,6 +254,7 @@ def _get_single_run_results(
         )
     # Convert run results into a dictionary of metrics
+    LOGGER.info(f"AOAI: Processing results and calculating metrics for run {run_info['eval_run_id']}...")
     run_metrics: Dict[str, Any] = {}
     if run_results.per_testing_criteria_results is None:
         msg = (
@@ -255,8 +275,10 @@ def _get_single_run_results(
         ratio = passed / (passed + failed) if (passed + failed) else 0.0
         formatted_column_name = f"{grader_name}.pass_rate"
         run_metrics[formatted_column_name] = ratio
+        LOGGER.info(f"AOAI: Grader '{grader_name}': {passed} passed, {failed} failed, pass_rate={ratio:.4f}")
     # Collect all results with pagination
+    LOGGER.info(f"AOAI: Collecting output items for run {run_info['eval_run_id']} with pagination...")
     all_results: List[Any] = []
     next_cursor: Optional[str] = None
     limit = 100  # Max allowed by API
@@ -280,6 +302,7 @@ def _get_single_run_results(
         else:
             break
+    LOGGER.info(f"AOAI: Collected {len(all_results)} total output items across all pages.")
     listed_results: Dict[str, List[Any]] = {"index": []}
     # Raw data has no order guarantees; capture datasource_item_id per row for ordering.
     for row_result in all_results:
@@ -329,6 +352,7 @@ def _get_single_run_results(
     # Ensure all columns are the same length as the 'index' list
     num_rows = len(listed_results["index"])
+    LOGGER.info(f"AOAI: Processing {num_rows} result rows into dataframe...")
     for col_name in list(listed_results.keys()):
         if col_name != "index":
             col_length = len(listed_results[col_name])
@@ -356,6 +380,7 @@ def _get_single_run_results(
     expected = run_info.get("expected_rows", None)
     if expected is not None:
         pre_len = len(output_df)
+        LOGGER.info(f"AOAI: Validating result count: expected {expected} rows, received {pre_len} rows.")
         # Assumes original datasource_item_id space is 0..expected-1
         output_df = output_df.reindex(range(expected))
         if pre_len != expected:
@@ -388,6 +413,9 @@ def _get_single_run_results(
     # Reset to RangeIndex so downstream concatenation aligns on position
     output_df.reset_index(drop=True, inplace=True)
+    LOGGER.info(
+        f"AOAI: Successfully processed run {run_info['eval_run_id']} with final dataframe shape: {output_df.shape}"
+    )
     return output_df, run_metrics
@@ -481,11 +509,16 @@ def _get_graders_and_column_mappings(
     :rtype: List[Tuple[Dict[str, AoaiGrader], Optional[Dict[str, str]]]]
     """
+    LOGGER.info(f"AOAI: Organizing {len(graders)} graders with column mappings...")
     if column_mappings is None:
+        LOGGER.info("AOAI: No column mappings provided, each grader will have its own eval run.")
         return [({name: grader}, None) for name, grader in graders.items()]
     default_mapping = column_mappings.get("default", None)
     if default_mapping is None:
         default_mapping = {}
+    LOGGER.info(
+        f"AOAI: Using default mapping with {len(default_mapping)} entries for graders without specific mappings."
+    )
     return [
         ({name: grader}, None if column_mappings is None else column_mappings.get(name, default_mapping))
         for name, grader in graders.items()
@@ -593,17 +626,23 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
     helper function.
     """
     # Extract referenced data paths from mapping values of the form ${data.<path>} (ignore ${run.outputs.*})
+    LOGGER.info(
+        f"AOAI: Generating data source config for {len(input_data_df)} rows with {len(column_mapping)} column mapping(s)..."
+    )
     referenced_paths: List[str] = []
     for v in column_mapping.values():
         m = DATA_PATH_PATTERN.match(v)
         if m:
             referenced_paths.append(m.group(1))
+    LOGGER.info(f"AOAI: Found {len(referenced_paths)} referenced paths in column mappings: {referenced_paths}")
     # Decide if we have nested structures
     has_nested = any("." in p for p in referenced_paths)
+    LOGGER.info(f"AOAI: Schema generation mode: {'nested' if has_nested else 'flat'}")
     if not referenced_paths or not has_nested:
         # Legacy flat behavior (existing logic): treat each mapping key as independent string field
+        LOGGER.info("AOAI: Using flat schema generation (no nested structures detected).")
         data_source_config = {
             "type": "custom",
             "item_schema": {
@@ -617,6 +656,7 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
         for key in column_mapping.keys():
             props[key] = {"type": "string"}
             req.append(key)
+        LOGGER.info(f"AOAI: Flat schema generated with {len(props)} properties: {list(props.keys())}")
         return data_source_config
     # NEW: If all nested paths share the same first segment (e.g. 'item'),
@@ -625,12 +665,14 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
     first_segments = {p.split(".")[0] for p in referenced_paths}
     strip_wrapper = False
     wrapper_name = None
+    LOGGER.info(f"AOAI: First segments in referenced paths: {first_segments}")
     if len(first_segments) == 1:
         only_seg = next(iter(first_segments))
         # We only strip if that segment looks like the canonical wrapper.
         if only_seg == WRAPPER_KEY:
             strip_wrapper = True
             wrapper_name = only_seg
+            LOGGER.info(f"AOAI: All paths start with wrapper '{WRAPPER_KEY}', will strip from schema.")
     effective_paths = referenced_paths
     if strip_wrapper:
@@ -645,9 +687,12 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
         # If stripping produced at least one usable path, adopt; else fall back to original.
         if stripped:
             effective_paths = stripped
+            LOGGER.info(f"AOAI: Effective paths after stripping wrapper: {effective_paths}")
+    LOGGER.info(f"AOAI: Building nested schema from {len(effective_paths)} effective paths...")
     nested_schema = _build_schema_tree_from_paths(effective_paths, force_leaf_type="string")
+    LOGGER.info(f"AOAI: Nested schema generated successfully with type '{nested_schema.get('type')}'")
     return {
         "type": "custom",
         "item_schema": nested_schema,
@@ -697,6 +742,23 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
     :return: A dictionary that can be used as the data source input for an OAI evaluation run.
     :rtype: Dict[str, Any]
     """
+    def _convert_value_to_string(val: Any) -> str:
+        """Convert a value to string representation for AOAI evaluation."""
+        if val is None:
+            return ""
+        elif isinstance(val, (str, int, float, bool)):
+            return str(val)
+        else:
+            try:  # Attempt to JSON serialize lists/dicts
+                return json.dumps(val, ensure_ascii=False)
+            except (TypeError, ValueError):
+                # Fallback for unserializable objects
+                return str(val)
+    LOGGER.info(
+        f"AOAI: Building data source from {len(input_data_df)} rows with {len(column_mapping)} column mappings..."
+    )
     # Gather path specs: list of tuples (original_mapping_value, relative_parts, dataframe_column_name)
     # relative_parts excludes the wrapper (so schema + content align).
     path_specs: List[Tuple[str, List[str], str]] = []
@@ -746,24 +808,21 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
             leaf_name = pieces[-1]
             path_specs.append((formatted_entry, [leaf_name], run_col))
+    LOGGER.info(f"AOAI: Processed {len(path_specs)} path specifications from column mappings.")
     content: List[Dict[str, Any]] = []
     for _, row in input_data_df.iterrows():
         item_root: Dict[str, Any] = {}
+        # Track which dataframe columns have been processed via column_mapping
+        processed_cols: Set[str] = set()
         for _, rel_parts, df_col in path_specs:
             # Safely fetch value
             val = row.get(df_col, None)
             # Convert value to string to match schema's "type": "string" leaves.
-            # (If you later infer types, you can remove the stringify.)
-            if val is None:
-                str_val = ""
-            elif isinstance(val, (str, int, float, bool)):
-                str_val = str(val)
-            else:
-                # Lists / dicts / other -> string for now
-                str_val = str(val)
+            str_val = _convert_value_to_string(val)
             # Insert into nested dict
             cursor = item_root
@@ -776,8 +835,19 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
             leaf_key = rel_parts[-1]
             cursor[leaf_key] = str_val
+            # Mark this dataframe column as processed
+            processed_cols.add(df_col)
+        # Add any unmapped dataframe columns directly to item_root
+        for col_name in input_data_df.columns:
+            if col_name not in processed_cols:
+                val = row.get(col_name, None)
+                str_val = _convert_value_to_string(val)
+                item_root[col_name] = str_val
         content.append({WRAPPER_KEY: item_root})
+    LOGGER.info(f"AOAI: Generated {len(content)} content items for data source.")
     return {
         "type": "jsonl",
         "source": {
@@ -812,6 +882,7 @@ def _begin_eval_run(
     :rtype: str
     """
+    LOGGER.info(f"AOAI: Creating eval run '{run_name}' for eval group {eval_group_id}...")
     data_source = _get_data_source(input_data_df, column_mapping)
     eval_run = client.evals.runs.create(
         eval_id=eval_group_id,
@@ -820,6 +891,7 @@ def _begin_eval_run(
         metadata={"sample_generation": "off", "file_format": "jsonl", "is_foundry_eval": "true"},
         # TODO decide if we want to add our own timeout value?
     )
+    LOGGER.info(f"AOAI: Eval run created successfully with ID: {eval_run.id}")
     return eval_run.id
@@ -856,8 +928,11 @@ def _wait_for_run_conclusion(
         if total_wait > max_wait_seconds:
             wait_interval -= total_wait - max_wait_seconds
         sleep(wait_interval)
+        iters += 1
         response = client.evals.runs.retrieve(eval_id=eval_group_id, run_id=eval_run_id)
+        LOGGER.info(f"AOAI: Polling iteration {iters}, status: {response.status}, total wait: {total_wait:.1f}s")
         if response.status not in ["queued", "in_progress"]:
+            LOGGER.info(f"AOAI: Eval run {eval_run_id} reached terminal status: {response.status}")
             return response
         if total_wait > max_wait_seconds:
             raise EvaluationException(

azure/ai/evaluation/_evaluate/_utils.py CHANGED Viewed

@@ -7,7 +7,8 @@ import os
 import re
 import tempfile
 from pathlib import Path
-from typing import Any, Dict, NamedTuple, Optional, Union, cast
+import time
+from typing import Any, Dict, List, NamedTuple, Optional, Union, cast
 import uuid
 import base64
 import math
@@ -25,7 +26,7 @@ from azure.ai.evaluation._constants import (
     Prefixes,
 )
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
-from azure.ai.evaluation._model_configurations import AzureAIProject
+from azure.ai.evaluation._model_configurations import AzureAIProject, EvaluationResult
 from azure.ai.evaluation._version import VERSION
 from azure.ai.evaluation._user_agent import UserAgentSingleton
 from azure.ai.evaluation._azure._clients import LiteMLClient
@@ -196,8 +197,14 @@ def _log_metrics_and_instance_results_onedp(
             )
         )
+        # TODO: type mis-match because Evaluation instance is assigned to EvaluationRun
+        evaluation_id = (
+            upload_run_response.name  # type: ignore[attr-defined]
+            if hasattr(upload_run_response, "name")
+            else upload_run_response.id
+        )
         update_run_response = client.update_evaluation_run(
-            name=upload_run_response.id,
+            name=evaluation_id,
             evaluation=EvaluationUpload(
                 display_name=evaluation_name,
                 status="Completed",

azure/ai/evaluation/_evaluator_definition.py ADDED Viewed

@@ -0,0 +1,76 @@
+from abc import ABC
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, field
+@dataclass
+class EvaluatorMetric:
+    type: str = "ordinal"
+    desirable_direction: Optional[str] = None
+    min_value: Optional[float] = None
+    max_value: Optional[float] = None
+    def to_dict(self) -> Dict[str, Any]:
+        result = {"type": self.type}
+        if self.desirable_direction is not None:
+            result["desirable_direction"] = self.desirable_direction
+        if self.min_value is not None:
+            result["min_value"] = self.min_value
+        if self.max_value is not None:
+            result["max_value"] = self.max_value
+        return result
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EvaluatorMetric":
+        return cls(
+            type=data.get("type", "ordinal"),
+            desirable_direction=data.get("desirable_direction"),
+            min_value=data.get("min_value"),
+            max_value=data.get("max_value"),
+        )
+@dataclass
+class ObjectParameterDescriptorWithRequired:
+    required: List[str] = field(default_factory=list)
+    type: str = "object"
+    properties: Dict[str, Any] = field(default_factory=dict)
+    def to_dict(self) -> Dict[str, Any]:
+        return {"required": self.required, "type": self.type, "properties": self.properties}
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ObjectParameterDescriptorWithRequired":
+        return cls(
+            required=data.get("required", []), type=data.get("type", "object"), properties=data.get("properties", {})
+        )
+class EvaluatorDefinition(ABC):
+    """Base class for evaluator definitions"""
+    def __init__(self):
+        self.init_parameters: ObjectParameterDescriptorWithRequired = ObjectParameterDescriptorWithRequired()
+        self.metrics: Dict[str, EvaluatorMetric] = {}
+        self.data_schema: ObjectParameterDescriptorWithRequired = ObjectParameterDescriptorWithRequired()
+        self.type: str = "unknown"
+    def to_dict(self) -> Dict[str, Any]:
+        result = {
+            "type": self.type,
+            "init_parameters": self.init_parameters.to_dict(),
+            "metrics": {k: v.to_dict() for k, v in self.metrics.items()},
+            "data_schema": self.data_schema.to_dict(),
+        }
+        return result
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EvaluatorDefinition":
+        # Create a generic instance since specific subclasses are not defined
+        instance = cls.__new__(cls)
+        instance.__init__()
+        instance.init_parameters = ObjectParameterDescriptorWithRequired.from_dict(data.get("init_parameters", {}))
+        instance.metrics = {k: EvaluatorMetric.from_dict(v) for k, v in data.get("metrics", {}).items()}
+        instance.data_schema = ObjectParameterDescriptorWithRequired.from_dict(data.get("data_schema", {}))
+        return instance

azure/ai/evaluation/_evaluators/_bleu/_bleu.py CHANGED Viewed

@@ -46,6 +46,7 @@ class BleuScoreEvaluator(EvaluatorBase):
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. admonition:: Example with Threshold:
         .. literalinclude:: ../samples/evaluation_samples_threshold.py
             :start-after: [START threshold_bleu_score_evaluator]
             :end-before: [END threshold_bleu_score_evaluator]

azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py CHANGED Viewed

@@ -56,23 +56,6 @@ class CodeVulnerabilityEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
     :param kwargs: Additional arguments to pass to the evaluator.
     :type kwargs: Any
-    .. admonition:: Example:
-        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
-            :start-after: [START code_vulnerability_evaluator]
-            :end-before: [END code_vulnerability_evaluator]
-            :language: python
-            :dedent: 8
-            :caption: Initialize and call CodeVulnerabilityEvaluator with a query and response using azure.ai.evaluation.AzureAIProject.
-        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
-            :start-after: [START code_vulnerability_evaluator]
-            :end-before: [END code_vulnerability_evaluator]
-            :language: python
-            :dedent: 8
-            :caption: Initialize and call CodeVulnerabilityEvaluator using Azure AI Project URL in following format
-                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. note::
         If this evaluator is supplied to the `evaluate` function, the metric

azure/ai/evaluation/_evaluators/_common/_base_eval.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import inspect
 from abc import ABC, abstractmethod
 import json
+import copy
 from typing import (
     Any,
     Callable,
@@ -39,6 +40,8 @@ from azure.ai.evaluation._common._experimental import experimental
 from ._conversation_aggregators import GetAggregator, GetAggregatorType
+import copy
 P = ParamSpec("P")
 T = TypeVar("T")
 T_EvalValue = TypeVar("T_EvalValue")
@@ -111,6 +114,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
     _NOT_APPLICABLE_RESULT = "not applicable"
     _PASS_RESULT = "pass"
     _FAIL_RESULT = "fail"
+    _type = "azure_ai_evaluator"
     # ~~~ METHODS THAT ALMOST ALWAYS NEED TO BE OVERRIDDEN BY CHILDREN~~~
@@ -488,13 +492,17 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
         """
         tool_calls = []
         tool_results_map = {}
-        if isinstance(response, list):
-            for message in response:
+        # Work on a deep copy to avoid modifying the original object
+        response_copy = copy.deepcopy(response)
+        if isinstance(response_copy, list):
+            for message in response_copy:
                 # Extract tool calls from assistant messages
                 if message.get("role") == "assistant" and isinstance(message.get("content"), list):
                     for content_item in message.get("content"):
                         if isinstance(content_item, dict) and content_item.get("type") == "tool_call":
-                            tool_calls.append(content_item)
+                            tool_calls.append(copy.deepcopy(content_item))
                 # Extract tool results from tool messages
                 elif message.get("role") == "tool" and message.get("tool_call_id"):
@@ -582,7 +590,11 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
         :rtype: Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]
         """
         # Convert inputs into list of evaluable inputs.
-        eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
+        try:
+            eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
+        except Exception as e:
+            print(f"Error converting kwargs to eval_input_list: {e}")
+            raise e
         per_turn_results = []
         # Evaluate all inputs.
         for eval_input in eval_input_list:

azure-ai-evaluation 1.12.0__py3-none-any.whl → 1.13.0__py3-none-any.whl

azure-ai-evaluation 1.12.0py3-none-any.whl → 1.13.0py3-none-any.whl