PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b5__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b5py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (72) hide show

azure/ai/evaluation/_evaluate/_evaluate.py CHANGED Viewed

@@ -3,24 +3,23 @@
 # ---------------------------------------------------------
 import inspect
 import json
+import logging
 import os
 import re
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict, TypeVar, Union
 import pandas as pd
 from promptflow._sdk._constants import LINE_NUMBER
-from promptflow._sdk._errors import MissingAzurePackage, UserAuthenticationError, UploadInternalError
 from promptflow.client import PFClient
 from promptflow.entities import Run
-from azure.ai.evaluation._common.math import list_sum
+from azure.ai.evaluation._common.math import list_mean_nan_safe, apply_transform_nan_safe
 from azure.ai.evaluation._common.utils import validate_azure_ai_project
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from .._constants import (
     CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
     EvaluationMetrics,
-    EvaluationRunProperties,
     Prefixes,
     _InternalEvaluationMetrics,
 )
@@ -35,6 +34,7 @@ from ._utils import (
 )
 TClient = TypeVar("TClient", ProxyClient, CodeClient)
+LOGGER = logging.getLogger(__name__)
 # For metrics (aggregates) whose metric names intentionally differ from their
 # originating column name, usually because the aggregation of the original value
@@ -69,10 +69,11 @@ def _aggregate_other_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[str, flo
             renamed_cols.append(col)
             new_col_name = metric_prefix + "." + METRIC_COLUMN_NAME_REPLACEMENTS[metric_name]
             col_with_numeric_values = pd.to_numeric(df[col], errors="coerce")
-            metric_columns[new_col_name] = round(
-                list_sum(col_with_numeric_values) / col_with_numeric_values.count(),
-                2,
-            )
+            try:
+                metric_columns[new_col_name] = round(list_mean_nan_safe(col_with_numeric_values), 2)
+            except EvaluationException:  # only exception that can be cause is all NaN values
+                msg = f"All score evaluations are NaN/None for column {col}. No aggregation can be performed."
+                LOGGER.warning(msg)
     return renamed_cols, metric_columns
@@ -119,11 +120,15 @@ def _aggregate_content_safety_metrics(
     for col in content_safety_df.columns:
         defect_rate_name = col.replace("_score", "_defect_rate")
         col_with_numeric_values = pd.to_numeric(content_safety_df[col], errors="coerce")
-        defect_rates[defect_rate_name] = round(
-            list_sum(col_with_numeric_values >= CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT)
-            / col_with_numeric_values.count(),
-            2,
-        )
+        try:
+            col_with_boolean_values = apply_transform_nan_safe(
+                col_with_numeric_values, lambda x: 1 if x >= CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT else 0
+            )
+            defect_rates[defect_rate_name] = round(list_mean_nan_safe(col_with_boolean_values), 2)
+        except EvaluationException:  # only exception that can be cause is all NaN values
+            msg = f"All score evaluations are NaN/None for column {col}. No aggregation can be performed."
+            LOGGER.warning(msg)
     return content_safety_cols, defect_rates
@@ -153,10 +158,11 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s
     for col in label_df.columns:
         defect_rate_name = col.replace("_label", "_defect_rate")
         col_with_boolean_values = pd.to_numeric(label_df[col], errors="coerce")
-        defect_rates[defect_rate_name] = round(
-            list_sum(col_with_boolean_values) / col_with_boolean_values.count(),
-            2,
-        )
+        try:
+            defect_rates[defect_rate_name] = round(list_mean_nan_safe(col_with_boolean_values), 2)
+        except EvaluationException:  # only exception that can be cause is all NaN values
+            msg = f"All score evaluations are NaN/None for column {col}. No aggregation can be performed."
+            LOGGER.warning(msg)
     return label_cols, defect_rates
@@ -193,6 +199,9 @@ def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dic
     # For rest of metrics, we will calculate mean
     df.drop(columns=handled_columns, inplace=True)
+    # NOTE: nan/None values don't count as as booleans, so boolean columns with
+    # nan/None values won't have a mean produced from them.
+    # This is different from label-based known evaluators, which have special handling.
     mean_value = df.mean(numeric_only=True)
     metrics = mean_value.to_dict()
     # Add defect rates back into metrics
@@ -287,7 +296,13 @@ def _validate_columns_for_evaluators(
                 # Ignore the missing fields if "conversation" presents in the input data
                 missing_inputs = []
             else:
-                missing_inputs = [col for col in evaluator_params if col not in new_df.columns]
+                optional_params = (
+                    evaluator._OPTIONAL_PARAMS  # pylint: disable=protected-access
+                    if hasattr(evaluator, "_OPTIONAL_PARAMS")
+                    else []
+                )
+                excluded_params = set(new_df.columns).union(optional_params)
+                missing_inputs = [col for col in evaluator_params if col not in excluded_params]
                 # If "conversation" is the only parameter and it is missing, keep it in the missing inputs
                 # Otherwise, remove it from the missing inputs
@@ -391,7 +406,7 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
             )
         output_dir = output_path if os.path.isdir(output_path) else os.path.dirname(output_path)
-        if not os.path.exists(output_dir):
+        if output_dir and not os.path.exists(output_dir):
             msg = f"The output directory '{output_dir}' does not exist. Please create the directory manually."
             raise EvaluationException(
                 message=msg,
@@ -451,33 +466,14 @@ def _apply_target_to_data(
     :rtype: Tuple[pandas.DataFrame, List[str]]
     """
     _run_name = kwargs.get("_run_name")
-    upload_target_snaphot = kwargs.get("_upload_target_snapshot", False)
-    try:
-        with TargetRunContext(upload_target_snaphot):
-            run: Run = pf_client.run(
-                flow=target,
-                display_name=evaluation_name,
-                data=data,
-                properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
-                stream=True,
-                name=_run_name,
-            )
-    except (UserAuthenticationError, UploadInternalError) as ex:
-        if "Failed to upload run" in ex.message:
-            msg = (
-                "Failed to upload the target run to the cloud. "
-                "This may be caused by insufficient permission to access storage or other errors."
-            )
-            raise EvaluationException(
-                message=msg,
-                target=ErrorTarget.EVALUATE,
-                category=ErrorCategory.FAILED_REMOTE_TRACKING,
-                blame=ErrorBlame.USER_ERROR,
-                tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
-            ) from ex
-        raise ex
+    with TargetRunContext():
+        run: Run = pf_client.run(
+            flow=target,
+            display_name=evaluation_name,
+            data=data,
+            stream=True,
+            name=_run_name,
+        )
     target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True)
     # Remove input and output prefix
@@ -601,48 +597,14 @@ def evaluate(
     :return: Evaluation results.
     :rtype: ~azure.ai.evaluation.EvaluationResult
-    :Example:
-    Evaluate API can be used as follows:
-    .. code-block:: python
-            from azure.ai.evaluation import evaluate, RelevanceEvaluator, CoherenceEvaluator
-            model_config = {
-                "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
-                "api_key": os.environ.get("AZURE_OPENAI_KEY"),
-                "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
-            }
-            coherence_eval = CoherenceEvaluator(model_config=model_config)
-            relevance_eval = RelevanceEvaluator(model_config=model_config)
-            path = "evaluate_test_data.jsonl"
-            result = evaluate(
-                data=path,
-                evaluators={
-                    "coherence": coherence_eval,
-                    "relevance": relevance_eval,
-                },
-                evaluator_config={
-                    "coherence": {
-                        "column_mapping": {
-                            "response": "${data.response}",
-                            "query": "${data.query}",
-                        },
-                    },
-                    "relevance": {
-                        "column_mapping": {
-                            "response": "${data.response}",
-                            "context": "${data.context}",
-                            "query": "${data.query}",
-                        },
-                    },
-                },
-            )
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
+            :start-after: [START evaluate_method]
+            :end-before: [END evaluate_method]
+            :language: python
+            :dedent: 8
+            :caption: Run an evaluation on local data with Coherence and Relevance evaluators.
     """
     try:
         return _evaluate(
@@ -698,7 +660,7 @@ def _print_summary(per_evaluator_results: Dict[str, Any]) -> None:
     if output_dict:
         print("======= Combined Run Summary (Per Evaluator) =======\n")
         print(json.dumps(output_dict, indent=4))
-        print("\n====================================================")
+        print("\n====================================================\n")
 def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
@@ -728,36 +690,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     if target is not None:
         _validate_columns_for_target(input_data_df, target)
-    # Target Run
-    try:
-        pf_client = PFClient(
-            config=(
-                {"trace.destination": _trace_destination_from_project_scope(azure_ai_project)}
-                if azure_ai_project
-                else None
-            ),
-            user_agent=USER_AGENT,
-        )
-    # pylint: disable=raise-missing-from
-    except MissingAzurePackage:
-        msg = (
-            "The required packages for remote tracking are missing.\n"
-            'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
-        )
-        raise EvaluationException(  # pylint: disable=raise-missing-from
-            message=msg,
-            target=ErrorTarget.EVALUATE,
-            category=ErrorCategory.MISSING_PACKAGE,
-            blame=ErrorBlame.USER_ERROR,
-        )
-    trace_destination: Optional[str] = pf_client._config.get_trace_destination()  # pylint: disable=protected-access
-    # Handle the case where the customer manually run "pf config set trace.destination=none"
-    if trace_destination and trace_destination.lower() == "none":
-        trace_destination = None
+    pf_client = PFClient(user_agent=USER_AGENT)
     target_run: Optional[Run] = None
     # Create default configuration for evaluators that directly maps
@@ -831,11 +764,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
         # Ensure the absolute path is passed to pf.run, as relative path doesn't work with
         # multiple evaluators. If the path is already absolute, abspath will return the original path.
         data = os.path.abspath(data)
-        # A user reported intermittent errors when PFClient uploads evaluation runs to the cloud.
-        # The root cause is still unclear, but it seems related to a conflict between the async run uploader
-        # and the async batch run. As a quick mitigation, use a PFClient without a trace destination for batch runs.
-        per_evaluator_results = eval_batch_run(ProxyClient(PFClient(user_agent=USER_AGENT)), data=data)
+        per_evaluator_results = eval_batch_run(ProxyClient(pf_client), data=data)
     else:
         data = input_data_df
         per_evaluator_results = eval_batch_run(CodeClient(), data=input_data_df)
@@ -877,20 +806,22 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
     metrics = _aggregate_metrics(evaluators_result_df, evaluators)
     metrics.update(evaluators_metric)
-    studio_url = _log_metrics_and_instance_results(
-        metrics,
-        result_df,
-        trace_destination,
-        target_run,
-        evaluation_name,
-    )
+    # Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
+    target_run = None
+    trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
+    studio_url = None
+    if trace_destination:
+        studio_url = _log_metrics_and_instance_results(
+            metrics, result_df, trace_destination, target_run, evaluation_name, **kwargs
+        )
     result_df_dict = result_df.to_dict("records")
     result: EvaluationResult = {"rows": result_df_dict, "metrics": metrics, "studio_url": studio_url}  # type: ignore
+    _print_summary(per_evaluator_results)
     if output_path:
         _write_output(output_path, result)
-    _print_summary(per_evaluator_results)
     return result

azure/ai/evaluation/_evaluate/_telemetry/__init__.py CHANGED Viewed

@@ -123,7 +123,8 @@ def log_evaluate_activity(func: Callable[P, EvaluationResult]) -> Callable[P, Ev
             user_agent=USER_AGENT,
         )
-        track_in_cloud = bool(pf_client._config.get_trace_destination())  # pylint: disable=protected-access
+        trace_destination = pf_client._config.get_trace_destination()  # pylint: disable=protected-access
+        track_in_cloud = bool(trace_destination) if trace_destination != "none" else False
         evaluate_target = bool(kwargs.get("target", None))
         evaluator_config = bool(kwargs.get("evaluator_config", None))
         custom_dimensions: Dict[str, Union[str, bool]] = {

azure/ai/evaluation/_evaluate/_utils.py CHANGED Viewed

@@ -7,12 +7,11 @@ import os
 import re
 import tempfile
 from pathlib import Path
-from typing import Any, Dict, NamedTuple, Optional, Tuple, Union
+from typing import Any, Dict, NamedTuple, Optional, Union, cast
 import uuid
 import base64
 import pandas as pd
-from promptflow.client import PFClient
 from promptflow.entities import Run
 from azure.ai.evaluation._constants import (
@@ -21,9 +20,10 @@ from azure.ai.evaluation._constants import (
     EvaluationRunProperties,
     Prefixes,
 )
-from azure.ai.evaluation._evaluate._eval_run import EvalRun
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._model_configurations import AzureAIProject
+from azure.ai.evaluation._version import VERSION
+from azure.ai.evaluation._azure._clients import LiteMLClient
 LOGGER = logging.getLogger(__name__)
@@ -46,6 +46,8 @@ def is_none(value) -> bool:
 def extract_workspace_triad_from_trace_provider(  # pylint: disable=name-too-long
     trace_provider: str,
 ) -> AzureMLWorkspace:
+    from promptflow._cli._utils import get_workspace_triad_from_local
     match = re.match(AZURE_WORKSPACE_REGEX_FORMAT, trace_provider)
     if not match or len(match.groups()) != 5:
         raise EvaluationException(
@@ -59,10 +61,20 @@ def extract_workspace_triad_from_trace_provider(  # pylint: disable=name-too-lon
             category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.UNKNOWN,
         )
     subscription_id = match.group(1)
     resource_group_name = match.group(3)
     workspace_name = match.group(5)
-    return AzureMLWorkspace(subscription_id, resource_group_name, workspace_name)
+    # In theory this if statement should never evaluate to True, but we'll keep it here just in case
+    # for backwards compatibility with what the original code that depended on promptflow-azure did
+    if not (subscription_id and resource_group_name and workspace_name):
+        local = get_workspace_triad_from_local()
+        subscription_id = subscription_id or local.subscription_id or os.getenv("AZUREML_ARM_SUBSCRIPTION")
+        resource_group_name = resource_group_name or local.resource_group_name or os.getenv("AZUREML_ARM_RESOURCEGROUP")
+        workspace_name = workspace_name or local.workspace_name or os.getenv("AZUREML_ARM_WORKSPACE_NAME")
+    return AzureMLWorkspace(subscription_id or "", resource_group_name or "", workspace_name or "")
 def load_jsonl(path):
@@ -70,19 +82,6 @@ def load_jsonl(path):
         return [json.loads(line) for line in f.readlines()]
-def _azure_pf_client_and_triad(trace_destination) -> Tuple[PFClient, AzureMLWorkspace]:
-    from promptflow.azure._cli._utils import _get_azure_pf_client
-    ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
-    azure_pf_client = _get_azure_pf_client(
-        subscription_id=ws_triad.subscription_id,
-        resource_group=ws_triad.resource_group_name,
-        workspace_name=ws_triad.workspace_name,
-    )
-    return azure_pf_client, ws_triad
 def _store_multimodal_content(messages, tmpdir: str):
     # verify if images folder exists
     images_folder_path = os.path.join(tmpdir, "images")
@@ -92,23 +91,40 @@ def _store_multimodal_content(messages, tmpdir: str):
     for message in messages:
         if isinstance(message.get("content", []), list):
             for content in message.get("content", []):
-                if content.get("type") == "image_url":
-                    image_url = content.get("image_url")
-                    if image_url and "url" in image_url and image_url["url"].startswith("data:image/jpg;base64,"):
-                        # Extract the base64 string
-                        base64image = image_url["url"].replace("data:image/jpg;base64,", "")
+                process_message_content(content, images_folder_path)
+def process_message_content(content, images_folder_path):
+    if content.get("type", "") == "image_url":
+        image_url = content.get("image_url")
-                        # Generate a unique filename
-                        image_file_name = f"{str(uuid.uuid4())}.jpg"
-                        image_url["url"] = f"images/{image_file_name}"  # Replace the base64 URL with the file path
+        if not image_url or "url" not in image_url:
+            return None
-                        # Decode the base64 string to binary image data
-                        image_data_binary = base64.b64decode(base64image)
+        url = image_url["url"]
+        if not url.startswith("data:image/"):
+            return None
-                        # Write the binary image data to the file
-                        image_file_path = os.path.join(images_folder_path, image_file_name)
-                        with open(image_file_path, "wb") as f:
-                            f.write(image_data_binary)
+        match = re.search("data:image/([^;]+);", url)
+        if not match:
+            return None
+        ext = match.group(1)
+        # Extract the base64 string
+        base64image = image_url["url"].replace(f"data:image/{ext};base64,", "")
+        # Generate a unique filename
+        image_file_name = f"{str(uuid.uuid4())}.{ext}"
+        image_url["url"] = f"images/{image_file_name}"  # Replace the base64 URL with the file path
+        # Decode the base64 string to binary image data
+        image_data_binary = base64.b64decode(base64image)
+        # Write the binary image data to the file
+        image_file_path = os.path.join(images_folder_path, image_file_name)
+        with open(image_file_path, "wb") as f:
+            f.write(image_data_binary)
+    return None
 def _log_metrics_and_instance_results(
@@ -117,27 +133,37 @@ def _log_metrics_and_instance_results(
     trace_destination: Optional[str],
     run: Run,
     evaluation_name: Optional[str],
+    **kwargs,
 ) -> Optional[str]:
+    from azure.ai.evaluation._evaluate._eval_run import EvalRun
     if trace_destination is None:
         LOGGER.debug("Skip uploading evaluation results to AI Studio since no trace destination was provided.")
         return None
-    azure_pf_client, ws_triad = _azure_pf_client_and_triad(trace_destination)
-    tracking_uri = azure_pf_client.ml_client.workspaces.get(ws_triad.workspace_name).mlflow_tracking_uri
+    ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
+    management_client = LiteMLClient(
+        subscription_id=ws_triad.subscription_id,
+        resource_group=ws_triad.resource_group_name,
+        logger=LOGGER,
+        credential=kwargs.get("credential"),
+        # let the client automatically determine the credentials to use
+    )
+    tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
     # Adding line_number as index column this is needed by UI to form link to individual instance run
     instance_results["line_number"] = instance_results.index.values
     with EvalRun(
         run_name=run.name if run is not None else evaluation_name,
-        tracking_uri=tracking_uri,
+        tracking_uri=cast(str, tracking_uri),
         subscription_id=ws_triad.subscription_id,
         group_name=ws_triad.resource_group_name,
         workspace_name=ws_triad.workspace_name,
-        ml_client=azure_pf_client.ml_client,
+        management_client=management_client,
         promptflow_run=run,
     ) as ev_run:
-        artifact_name = EvalRun.EVALUATION_ARTIFACT if run else EvalRun.EVALUATION_ARTIFACT_DUMMY_RUN
+        artifact_name = EvalRun.EVALUATION_ARTIFACT
         with tempfile.TemporaryDirectory() as tmpdir:
             # storing multi_modal images if exists
@@ -164,9 +190,15 @@ def _log_metrics_and_instance_results(
                 ev_run.write_properties_to_run_history(
                     properties={
                         EvaluationRunProperties.RUN_TYPE: "eval_run",
-                        EvaluationRunProperties.EVALUATION_RUN: "azure-ai-generative-parent",
+                        EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
+                        EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
                         "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
-                        "isEvaluatorRun": "true",
+                    }
+                )
+            else:
+                ev_run.write_properties_to_run_history(
+                    properties={
+                        EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
                     }
                 )
@@ -211,6 +243,8 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
     with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
         json.dump(data_dict, f)
+    print(f'Evaluation results saved to "{p.resolve()}".\n')
 def _apply_column_mapping(
     source_df: pd.DataFrame, mapping_config: Optional[Dict[str, str]], inplace: bool = False

azure/ai/evaluation/_evaluators/_bleu/_bleu.py CHANGED Viewed

@@ -26,31 +26,30 @@ class _AsyncBleuScoreEvaluator:
 class BleuScoreEvaluator:
     """
-    Evaluator that computes the BLEU Score between two strings.
+    Calculate the BLEU score for a given response and ground truth.
     BLEU (Bilingual Evaluation Understudy) score is commonly used in natural language processing (NLP) and machine
-    translation. It is widely used in text summarization and text generation use cases. It evaluates how closely the
-    generated text matches the reference text. The BLEU score ranges from 0 to 1, with higher scores indicating
-    better quality.
+    translation. It is widely used in text summarization and text generation use cases.
-    **Usage**
+    Use the BLEU score when you want to evaluate the similarity between the generated text and reference text,
+    especially in tasks such as machine translation or text summarization, where n-gram overlap is a significant
+    indicator of quality.
-    .. code-block:: python
+    The BLEU score ranges from 0 to 1, with higher scores indicating better quality.
-        eval_fn = BleuScoreEvaluator()
-        result = eval_fn(
-            response="Tokyo is the capital of Japan.",
-            ground_truth="The capital of Japan is Tokyo.")
+    .. admonition:: Example:
-    **Output format**
-    .. code-block:: python
-        {
-            "bleu_score": 0.22
-        }
+        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
+            :start-after: [START bleu_score_evaluator]
+            :end-before: [END bleu_score_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call an BleuScoreEvaluator.
     """
+    id = "azureml://registries/azureml/models/Bleu-Score-Evaluator/versions/3"
+    """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
     def __init__(self):
         self._async_evaluator = _AsyncBleuScoreEvaluator()

azure-ai-evaluation 1.0.0b5__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b5py3-none-any.whl → 1.1.0py3-none-any.whl