PyPI - azure-ai-evaluation - Versions diffs - 1.0.1__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (47) hide show

azure/ai/evaluation/_constants.py CHANGED Viewed

@@ -1,7 +1,9 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import enum
 from typing import Literal
+from azure.ai.evaluation._common._experimental import experimental
 class EvaluationMetrics:
@@ -54,6 +56,23 @@ class EvaluationRunProperties:
     RUN_TYPE = "runType"
     EVALUATION_RUN = "_azureml.evaluation_run"
+    EVALUATION_SDK = "_azureml.evaluation_sdk_name"
+@experimental
+class _AggregationType(enum.Enum):
+    """Defines how numeric evaluation results should be aggregated
+    to produce a single value. Used by individual evaluators to combine per-turn results for
+    a conversation-based input. In general, wherever this enum is used, it is also possible
+    to directly assign the underlying aggregation function for more complex use cases.
+    The 'custom' value is generally not an acceptable input, and should only be used as an output
+    to indicate that a custom aggregation function has been injected."""
+    MEAN = "mean"
+    MAX = "max"
+    MIN = "min"
+    SUM = "sum"
+    CUSTOM = "custom"
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"

azure/ai/evaluation/_evaluate/_batch_run/__init__.py CHANGED Viewed

@@ -5,5 +5,6 @@ from .eval_run_context import EvalRunContext
 from .code_client import CodeClient
 from .proxy_client import ProxyClient
 from .target_run_context import TargetRunContext
+from .proxy_client import ProxyRun
-__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
+__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext", "ProxyRun"]

azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py CHANGED Viewed

@@ -16,7 +16,7 @@ class TargetRunContext:
     :type upload_snapshot: bool
     """
-    def __init__(self, upload_snapshot: bool) -> None:
+    def __init__(self, upload_snapshot: bool = False) -> None:
         self._upload_snapshot = upload_snapshot
         self._original_cwd = os.getcwd()

azure/ai/evaluation/_evaluate/_eval_run.py CHANGED Viewed

@@ -22,29 +22,12 @@ from azure.ai.evaluation._version import VERSION
 from azure.core.pipeline.policies import RetryPolicy
 from azure.core.rest import HttpResponse
 from azure.core.exceptions import HttpResponseError
+from azure.storage.blob import BlobServiceClient
+from azure.ai.evaluation._azure._clients import LiteMLClient
 LOGGER = logging.getLogger(__name__)
-# Handle optional import. The azure libraries are only present if
-# promptflow-azure is installed.
-try:
-    from azure.ai.ml import MLClient
-    from azure.ai.ml.entities._credentials import AccountKeyConfiguration  # pylint: disable=ungrouped-imports
-    from azure.ai.ml.entities._datastore.datastore import Datastore
-    from azure.storage.blob import BlobServiceClient
-except (ModuleNotFoundError, ImportError):
-    raise EvaluationException(  # pylint: disable=raise-missing-from
-        message=(
-            "The required packages for remote tracking are missing.\n"
-            'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
-        ),
-        target=ErrorTarget.EVALUATE,
-        category=ErrorCategory.MISSING_PACKAGE,
-        blame=ErrorBlame.USER_ERROR,
-    )
 @dataclasses.dataclass
 class RunInfo:
     """
@@ -93,15 +76,16 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
     :type group_name: str
     :param workspace_name: The name of workspace/project used to track run.
     :type workspace_name: str
-    :param ml_client: The ml client used for authentication into Azure.
-    :type ml_client: azure.ai.ml.MLClient
+    :param management_client: The trace destination string to parse the AI ML workspace blob store from.
+    :type management_client:
+        ~azure.ai.evaluation._promptflow.azure._lite_azure_management_client.LiteMLClient
     :param promptflow_run: The promptflow run used by the
+    :type promptflow_run: Optional[promptflow._sdk.entities.Run]
     """
     _MAX_RETRIES = 5
     _BACKOFF_FACTOR = 2
     _TIMEOUT = 5
-    _SCOPE = "https://management.azure.com/.default"
     EVALUATION_ARTIFACT = "instance_results.jsonl"
@@ -112,14 +96,14 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         subscription_id: str,
         group_name: str,
         workspace_name: str,
-        ml_client: "MLClient",
+        management_client: LiteMLClient,
         promptflow_run: Optional[Run] = None,
     ) -> None:
         self._tracking_uri: str = tracking_uri
         self._subscription_id: str = subscription_id
         self._resource_group_name: str = group_name
         self._workspace_name: str = workspace_name
-        self._ml_client: Any = ml_client
+        self._management_client: LiteMLClient = management_client
         self._is_promptflow_run: bool = promptflow_run is not None
         self._run_name = run_name
         self._promptflow_run = promptflow_run
@@ -184,7 +168,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             if self._promptflow_run is not None:
                 self._info = RunInfo(
                     self._promptflow_run.name,
-                    self._promptflow_run._experiment_name,  # pylint: disable=protected-access
+                    self._promptflow_run._experiment_name or "",  # pylint: disable=protected-access
                     self._promptflow_run.name,
                 )
             else:
@@ -310,12 +294,8 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
-    def _get_token(self):
-        # We have to use lazy import because promptflow.azure
-        # is an optional dependency.
-        from promptflow.azure._utils._token_cache import ArmTokenCache  # pylint: disable=import-error,no-name-in-module
-        return ArmTokenCache().get_token(self._ml_client._credential)  # pylint: disable=protected-access
+    def _get_token(self) -> str:
+        return self._management_client.get_token()
     def request_with_retry(
         self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
@@ -441,9 +421,12 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
                 local_paths.append(local_file_path)
         # We will write the artifacts to the workspaceblobstore
-        datastore = self._ml_client.datastores.get_default(include_secrets=True)
+        datastore = self._management_client.workspace_get_default_datastore(
+            self._workspace_name, include_credentials=True
+        )
         account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
-        svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
+        svc_client = BlobServiceClient(account_url=account_url, credential=datastore.credential)
         try:
             for local, remote in zip(local_paths, remote_paths["paths"]):
                 blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
@@ -515,16 +498,6 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
-    def _get_datastore_credential(self, datastore: "Datastore"):
-        # Reference the logic in azure.ai.ml._artifact._artifact_utilities
-        # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103
-        credential = datastore.credentials
-        if isinstance(credential, AccountKeyConfiguration):
-            return credential.account_key
-        if hasattr(credential, "sas_token"):
-            return credential.sas_token
-        return self._ml_client.datastores._credential  # pylint: disable=protected-access
     def log_metric(self, key: str, value: float) -> None:
         """
         Log the metric to azure similar to how it is done by mlflow.

azure/ai/evaluation/_evaluate/_evaluate.py CHANGED Viewed

@@ -10,9 +10,9 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict, T
 import pandas as pd
 from promptflow._sdk._constants import LINE_NUMBER
-from promptflow._sdk._errors import UserAuthenticationError, UploadInternalError
 from promptflow.client import PFClient
 from promptflow.entities import Run
+from promptflow._sdk._configuration import Configuration
 from azure.ai.evaluation._common.math import list_mean_nan_safe, apply_transform_nan_safe
 from azure.ai.evaluation._common.utils import validate_azure_ai_project
@@ -21,18 +21,19 @@ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarg
 from .._constants import (
     CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
     EvaluationMetrics,
-    EvaluationRunProperties,
+    DefaultOpenEncoding,
     Prefixes,
     _InternalEvaluationMetrics,
 )
 from .._model_configurations import AzureAIProject, EvaluationResult, EvaluatorConfig
 from .._user_agent import USER_AGENT
-from ._batch_run import EvalRunContext, CodeClient, ProxyClient, TargetRunContext
+from ._batch_run import EvalRunContext, CodeClient, ProxyClient, TargetRunContext, ProxyRun
 from ._utils import (
     _apply_column_mapping,
     _log_metrics_and_instance_results,
     _trace_destination_from_project_scope,
     _write_output,
+    DataLoaderFactory,
 )
 TClient = TypeVar("TClient", ProxyClient, CodeClient)
@@ -431,10 +432,11 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
             )
     try:
-        initial_data_df = pd.read_json(data, lines=True)
+        data_loader = DataLoaderFactory.get_loader(data)
+        initial_data_df = data_loader.load()
     except Exception as e:
         raise EvaluationException(
-            message=f"Unable to load data from '{data}'. Please ensure the input is valid JSONL format. Detailed error: {e}.",
+            message=f"Unable to load data from '{data}'. Supported formats are JSONL and CSV. Detailed error: {e}.",
             target=ErrorTarget.EVALUATE,
             category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.USER_ERROR,
@@ -446,7 +448,7 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
 def _apply_target_to_data(
     target: Callable,
     data: Union[str, os.PathLike],
-    pf_client: PFClient,
+    batch_client: TClient,
     initial_data: pd.DataFrame,
     evaluation_name: Optional[str] = None,
     **kwargs,
@@ -456,10 +458,10 @@ def _apply_target_to_data(
     :param target: The function to be applied to data.
     :type target: Callable
-    :param data: The path to input jsonl file.
+    :param data: The path to input jsonl or csv file.
     :type data: Union[str, os.PathLike]
-    :param pf_client: The promptflow client to be used.
-    :type pf_client: PFClient
+    :param batch_client: The promptflow client to be used.
+    :type batch_client: PFClient
     :param initial_data: The data frame with the loaded data.
     :type initial_data: pd.DataFrame
     :param evaluation_name: The name of the evaluation.
@@ -468,35 +470,27 @@ def _apply_target_to_data(
     :rtype: Tuple[pandas.DataFrame, List[str]]
     """
     _run_name = kwargs.get("_run_name")
-    upload_target_snaphot = kwargs.get("_upload_target_snapshot", False)
-    try:
-        with TargetRunContext(upload_target_snaphot):
-            run: Run = pf_client.run(
-                flow=target,
-                display_name=evaluation_name,
-                data=data,
-                properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
-                stream=True,
-                name=_run_name,
-            )
-    except (UserAuthenticationError, UploadInternalError) as ex:
-        if "Failed to upload run" in ex.message:
-            msg = (
-                "Failed to upload the target run to the cloud. "
-                "This may be caused by insufficient permission to access storage or other errors."
-            )
-            raise EvaluationException(
-                message=msg,
-                target=ErrorTarget.EVALUATE,
-                category=ErrorCategory.FAILED_REMOTE_TRACKING,
-                blame=ErrorBlame.USER_ERROR,
-                tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
-            ) from ex
+    with TargetRunContext():
+        run: ProxyRun = batch_client.run(
+            flow=target,
+            display_name=evaluation_name,
+            data=data,
+            stream=True,
+            name=_run_name,
+        )
-        raise ex
+    target_output: pd.DataFrame = batch_client.get_details(run, all_results=True)
+    run_summary = batch_client.get_run_summary(run)
-    target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True)
+    if run_summary["completed_lines"] == 0:
+        msg = (f"Evaluation target failed to produce any results."
+               f" Please check the logs at {run_summary['log_path']} for more details about cause of failure.")
+        raise EvaluationException(
+            message=msg,
+            target=ErrorTarget.EVALUATE,
+            category=ErrorCategory.FAILED_EXECUTION,
+            blame=ErrorBlame.USER_ERROR,
+        )
     # Remove input and output prefix
     generated_columns = {
         col[len(Prefixes.OUTPUTS) :] for col in target_output.columns if col.startswith(Prefixes.OUTPUTS)
@@ -515,7 +509,7 @@ def _apply_target_to_data(
     # Concatenate output to input
     target_output = pd.concat([target_output, initial_data], axis=1)
-    return target_output, generated_columns, run
+    return target_output, generated_columns, run.run.result()
 def _process_column_mappings(
@@ -590,13 +584,14 @@ def evaluate(
     evaluator_config: Optional[Dict[str, EvaluatorConfig]] = None,
     azure_ai_project: Optional[AzureAIProject] = None,
     output_path: Optional[Union[str, os.PathLike]] = None,
+    fail_on_evaluator_errors: bool = False,
     **kwargs,
 ) -> EvaluationResult:
     """Evaluates target or data with built-in or custom evaluators. If both target and data are provided,
         data will be run through target function and then results will be evaluated.
     :keyword data: Path to the data to be evaluated or passed to target if target is set.
-        Only .jsonl format files are supported.  `target` and `data` both cannot be None. Required.
+        JSONL and CSV files are supported.  `target` and `data` both cannot be None. Required.
     :paramtype data: str
     :keyword evaluators: Evaluators to be used for evaluation. It should be a dictionary with key as alias for evaluator
         and value as the evaluator function. Required.
@@ -615,6 +610,11 @@ def evaluate(
     :paramtype output_path: Optional[str]
     :keyword azure_ai_project: Logs evaluation results to AI Studio if set.
     :paramtype azure_ai_project: Optional[~azure.ai.evaluation.AzureAIProject]
+    :keyword fail_on_evaluator_errors: Whether or not the evaluation should cancel early with an EvaluationException
+        if ANY evaluator fails during their evaluation.
+        Defaults to false, which means that evaluations will continue regardless of failures.
+        If such failures occur, metrics may be missing, and evidence of failures can be found in the evaluation's logs.
+    :paramtype fail_on_evaluator_errors: bool
     :return: Evaluation results.
     :rtype: ~azure.ai.evaluation.EvaluationResult
@@ -636,6 +636,7 @@ def evaluate(
             evaluator_config=evaluator_config,
             azure_ai_project=azure_ai_project,
             output_path=output_path,
+            fail_on_evaluator_errors=fail_on_evaluator_errors,
             **kwargs,
         )
     except Exception as e:
@@ -684,6 +685,16 @@ def _print_summary(per_evaluator_results: Dict[str, Any]) -> None:
         print("\n====================================================\n")
+def _print_fail_flag_warning() -> None:
+    print(
+        "Notice: fail_on_evaluator_errors is enabled. It is recommended that you disable "
+        + "this flag for evaluations on large datasets (loosely defined as more than 10 rows of inputs, "
+        + "or more than 4 evaluators). Using this flag on large datasets runs the risk of large runs failing "
+        + "without producing any outputs, since a single failure will cancel the entire run "
+        "when fail_on_evaluator_errors is enabled."
+    )
 def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     *,
     evaluators: Dict[str, Callable],
@@ -693,8 +704,11 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     evaluator_config: Optional[Dict[str, EvaluatorConfig]] = None,
     azure_ai_project: Optional[AzureAIProject] = None,
     output_path: Optional[Union[str, os.PathLike]] = None,
+    fail_on_evaluator_errors: bool = False,
     **kwargs,
 ) -> EvaluationResult:
+    if fail_on_evaluator_errors:
+        _print_fail_flag_warning()
     input_data_df = _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name)
     # Process evaluator config to replace ${target.} with ${data.}
@@ -711,6 +725,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     if target is not None:
         _validate_columns_for_target(input_data_df, target)
+    Configuration.get_instance().set_config("trace.destination", "none")
     pf_client = PFClient(user_agent=USER_AGENT)
     target_run: Optional[Run] = None
@@ -723,7 +738,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     target_generated_columns: Set[str] = set()
     if data is not None and target is not None:
         input_data_df, target_generated_columns, target_run = _apply_target_to_data(
-            target, data, pf_client, input_data_df, evaluation_name, **kwargs
+            target, data, ProxyClient(pf_client), input_data_df, evaluation_name, **kwargs
         )
         for evaluator_name, mapping in column_mapping.items():
@@ -794,6 +809,10 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     evaluators_result_df = None
     evaluators_metric = {}
     for evaluator_name, evaluator_result in per_evaluator_results.items():
+        if fail_on_evaluator_errors and evaluator_result["run_summary"]["failed_lines"] > 0:
+            _print_summary(per_evaluator_results)
+            _turn_error_logs_into_exception(evaluator_result["run_summary"]["log_path"] + "/error.json")
         evaluator_result_df = evaluator_result["result"]
         # drop input columns
@@ -834,11 +853,7 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
     studio_url = None
     if trace_destination:
         studio_url = _log_metrics_and_instance_results(
-            metrics,
-            result_df,
-            trace_destination,
-            target_run,
-            evaluation_name,
+            metrics, result_df, trace_destination, target_run, evaluation_name, **kwargs
         )
     result_df_dict = result_df.to_dict("records")
@@ -850,3 +865,20 @@ def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
         _write_output(output_path, result)
     return result
+def _turn_error_logs_into_exception(log_path: str) -> None:
+    """Produce an EvaluationException using the contents of the inputted
+    file as the error message.
+    :param log_path: The path to the error log file.
+    :type log_path: str
+    """
+    with open(log_path, "r", encoding=DefaultOpenEncoding.READ) as file:
+        error_message = file.read()
+    raise EvaluationException(
+        message=error_message,
+        target=ErrorTarget.EVALUATE,
+        category=ErrorCategory.FAILED_EXECUTION,
+        blame=ErrorBlame.UNKNOWN,
+    )

azure/ai/evaluation/_evaluate/_utils.py CHANGED Viewed

@@ -7,12 +7,11 @@ import os
 import re
 import tempfile
 from pathlib import Path
-from typing import Any, Dict, NamedTuple, Optional, Tuple, Union
+from typing import Any, Dict, NamedTuple, Optional, Union, cast
 import uuid
 import base64
 import pandas as pd
-from promptflow.client import PFClient
 from promptflow.entities import Run
 from azure.ai.evaluation._constants import (
@@ -23,6 +22,8 @@ from azure.ai.evaluation._constants import (
 )
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._model_configurations import AzureAIProject
+from azure.ai.evaluation._version import VERSION
+from azure.ai.evaluation._azure._clients import LiteMLClient
 LOGGER = logging.getLogger(__name__)
@@ -45,6 +46,8 @@ def is_none(value) -> bool:
 def extract_workspace_triad_from_trace_provider(  # pylint: disable=name-too-long
     trace_provider: str,
 ) -> AzureMLWorkspace:
+    from promptflow._cli._utils import get_workspace_triad_from_local
     match = re.match(AZURE_WORKSPACE_REGEX_FORMAT, trace_provider)
     if not match or len(match.groups()) != 5:
         raise EvaluationException(
@@ -58,10 +61,20 @@ def extract_workspace_triad_from_trace_provider(  # pylint: disable=name-too-lon
             category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.UNKNOWN,
         )
     subscription_id = match.group(1)
     resource_group_name = match.group(3)
     workspace_name = match.group(5)
-    return AzureMLWorkspace(subscription_id, resource_group_name, workspace_name)
+    # In theory this if statement should never evaluate to True, but we'll keep it here just in case
+    # for backwards compatibility with what the original code that depended on promptflow-azure did
+    if not (subscription_id and resource_group_name and workspace_name):
+        local = get_workspace_triad_from_local()
+        subscription_id = subscription_id or local.subscription_id or os.getenv("AZUREML_ARM_SUBSCRIPTION")
+        resource_group_name = resource_group_name or local.resource_group_name or os.getenv("AZUREML_ARM_RESOURCEGROUP")
+        workspace_name = workspace_name or local.workspace_name or os.getenv("AZUREML_ARM_WORKSPACE_NAME")
+    return AzureMLWorkspace(subscription_id or "", resource_group_name or "", workspace_name or "")
 def load_jsonl(path):
@@ -69,19 +82,6 @@ def load_jsonl(path):
         return [json.loads(line) for line in f.readlines()]
-def _azure_pf_client_and_triad(trace_destination) -> Tuple[PFClient, AzureMLWorkspace]:
-    from promptflow.azure._cli._utils import _get_azure_pf_client
-    ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
-    azure_pf_client = _get_azure_pf_client(
-        subscription_id=ws_triad.subscription_id,
-        resource_group=ws_triad.resource_group_name,
-        workspace_name=ws_triad.workspace_name,
-    )
-    return azure_pf_client, ws_triad
 def _store_multimodal_content(messages, tmpdir: str):
     # verify if images folder exists
     images_folder_path = os.path.join(tmpdir, "images")
@@ -91,23 +91,40 @@ def _store_multimodal_content(messages, tmpdir: str):
     for message in messages:
         if isinstance(message.get("content", []), list):
             for content in message.get("content", []):
-                if content.get("type") == "image_url":
-                    image_url = content.get("image_url")
-                    if image_url and "url" in image_url and image_url["url"].startswith("data:image/jpg;base64,"):
-                        # Extract the base64 string
-                        base64image = image_url["url"].replace("data:image/jpg;base64,", "")
+                process_message_content(content, images_folder_path)
+def process_message_content(content, images_folder_path):
+    if content.get("type", "") == "image_url":
+        image_url = content.get("image_url")
+        if not image_url or "url" not in image_url:
+            return None
+        url = image_url["url"]
+        if not url.startswith("data:image/"):
+            return None
+        match = re.search("data:image/([^;]+);", url)
+        if not match:
+            return None
-                        # Generate a unique filename
-                        image_file_name = f"{str(uuid.uuid4())}.jpg"
-                        image_url["url"] = f"images/{image_file_name}"  # Replace the base64 URL with the file path
+        ext = match.group(1)
+        # Extract the base64 string
+        base64image = image_url["url"].replace(f"data:image/{ext};base64,", "")
-                        # Decode the base64 string to binary image data
-                        image_data_binary = base64.b64decode(base64image)
+        # Generate a unique filename
+        image_file_name = f"{str(uuid.uuid4())}.{ext}"
+        image_url["url"] = f"images/{image_file_name}"  # Replace the base64 URL with the file path
-                        # Write the binary image data to the file
-                        image_file_path = os.path.join(images_folder_path, image_file_name)
-                        with open(image_file_path, "wb") as f:
-                            f.write(image_data_binary)
+        # Decode the base64 string to binary image data
+        image_data_binary = base64.b64decode(base64image)
+        # Write the binary image data to the file
+        image_file_path = os.path.join(images_folder_path, image_file_name)
+        with open(image_file_path, "wb") as f:
+            f.write(image_data_binary)
+    return None
 def _log_metrics_and_instance_results(
@@ -116,6 +133,7 @@ def _log_metrics_and_instance_results(
     trace_destination: Optional[str],
     run: Run,
     evaluation_name: Optional[str],
+    **kwargs,
 ) -> Optional[str]:
     from azure.ai.evaluation._evaluate._eval_run import EvalRun
@@ -123,19 +141,26 @@ def _log_metrics_and_instance_results(
         LOGGER.debug("Skip uploading evaluation results to AI Studio since no trace destination was provided.")
         return None
-    azure_pf_client, ws_triad = _azure_pf_client_and_triad(trace_destination)
-    tracking_uri = azure_pf_client.ml_client.workspaces.get(ws_triad.workspace_name).mlflow_tracking_uri
+    ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
+    management_client = LiteMLClient(
+        subscription_id=ws_triad.subscription_id,
+        resource_group=ws_triad.resource_group_name,
+        logger=LOGGER,
+        credential=kwargs.get("credential"),
+        # let the client automatically determine the credentials to use
+    )
+    tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
     # Adding line_number as index column this is needed by UI to form link to individual instance run
     instance_results["line_number"] = instance_results.index.values
     with EvalRun(
         run_name=run.name if run is not None else evaluation_name,
-        tracking_uri=tracking_uri,
+        tracking_uri=cast(str, tracking_uri),
         subscription_id=ws_triad.subscription_id,
         group_name=ws_triad.resource_group_name,
         workspace_name=ws_triad.workspace_name,
-        ml_client=azure_pf_client.ml_client,
+        management_client=management_client,
         promptflow_run=run,
     ) as ev_run:
         artifact_name = EvalRun.EVALUATION_ARTIFACT
@@ -166,9 +191,16 @@ def _log_metrics_and_instance_results(
                     properties={
                         EvaluationRunProperties.RUN_TYPE: "eval_run",
                         EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
+                        EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
                         "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
                     }
                 )
+            else:
+                ev_run.write_properties_to_run_history(
+                    properties={
+                        EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
+                    }
+                )
         for metric_name, metric_value in metrics.items():
             ev_run.log_metric(metric_name, metric_value)
@@ -296,3 +328,30 @@ def set_event_loop_policy() -> None:
         # Reference: https://stackoverflow.com/questions/45600579/asyncio-event-loop-is-closed-when-getting-loop
         # On Windows seems to be a problem with EventLoopPolicy, use this snippet to work around it
         asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())  # type: ignore[attr-defined]
+class JSONLDataFileLoader:
+    def __init__(self, filename: Union[os.PathLike, str]):
+        self.filename = filename
+    def load(self) -> pd.DataFrame:
+        return pd.read_json(self.filename, lines=True)
+class CSVDataFileLoader:
+    def __init__(self, filename: Union[os.PathLike, str]):
+        self.filename = filename
+    def load(self) -> pd.DataFrame:
+        return pd.read_csv(self.filename)
+class DataLoaderFactory:
+    @staticmethod
+    def get_loader(filename: Union[os.PathLike, str]) -> Union[JSONLDataFileLoader, CSVDataFileLoader]:
+        filename_str = str(filename).lower()
+        if filename_str.endswith(".csv"):
+            return CSVDataFileLoader(filename)
+        # fallback to JSONL to maintain backward compatibility
+        return JSONLDataFileLoader(filename)

azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.2.0py3-none-any.whl