PyPI - azure-ai-evaluation - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.0b2__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0py3-none-any.whl → 1.0.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (105) hide show

azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/code_client.py RENAMED Viewed

@@ -5,9 +5,8 @@ import inspect
 import json
 import logging
 import os
-from concurrent.futures import Future
 from pathlib import Path
-from typing import Any, Callable, Dict, Optional, Union, cast
+from typing import Callable, Dict, Optional, Union
 import pandas as pd
 from promptflow.contracts.types import AttrDict
@@ -23,31 +22,25 @@ LOGGER = logging.getLogger(__name__)
 class CodeRun:
     def __init__(
-        self,
-        *,
-        run: Future,
-        input_data,
-        evaluator_name: Optional[str] = None,
-        aggregator: Callable[["CodeRun"], Future],
-        **kwargs,  # pylint: disable=unused-argument
-    ) -> None:
+        self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs  # pylint: disable=unused-argument
+    ):
         self.run = run
         self.evaluator_name = evaluator_name if evaluator_name is not None else ""
         self.input_data = input_data
-        self.aggregated_metrics = aggregator(self)
+        self.aggregated_metrics = aggregated_metrics
-    def get_result_df(self, exclude_inputs: bool = False) -> pd.DataFrame:
+    def get_result_df(self, exclude_inputs=False):
         batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
-        result_df = cast(pd.DataFrame, self.run.result(timeout=batch_run_timeout))
+        result_df = self.run.result(timeout=batch_run_timeout)
         if exclude_inputs:
             result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")])
         return result_df
-    def get_aggregated_metrics(self) -> Dict[str, Any]:
+    def get_aggregated_metrics(self):
         try:
             batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
-            aggregated_metrics: Optional[Any] = (
-                cast(Dict, self.aggregated_metrics.result(timeout=batch_run_timeout))
+            aggregated_metrics = (
+                self.aggregated_metrics.result(timeout=batch_run_timeout)
                 if self.aggregated_metrics is not None
                 else None
             )
@@ -111,10 +104,10 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             verify_integrity=True,
         )
-    @staticmethod
-    def _calculate_aggregations(evaluator: Callable, run: CodeRun) -> Any:
+    def _calculate_aggregations(self, evaluator, run):
         try:
             if _has_aggregator(evaluator):
+                aggregate_input = None
                 evaluator_output = run.get_result_df(exclude_inputs=True)
                 if len(evaluator_output.columns) == 1 and evaluator_output.columns[0] == "output":
                     aggregate_input = evaluator_output["output"].tolist()
@@ -159,30 +152,21 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             column_mapping=column_mapping,
             evaluator_name=evaluator_name,
         )
-        return CodeRun(
-            run=eval_future,
-            input_data=data,
-            evaluator_name=evaluator_name,
-            aggregator=lambda code_run: self._thread_pool.submit(
-                self._calculate_aggregations, evaluator=flow, run=code_run
-            ),
-        )
+        run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
+        aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
+        run.aggregated_metrics = aggregation_future
+        return run
     def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
         result_df = run.get_result_df(exclude_inputs=not all_results)
         return result_df
-    def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
+    def get_metrics(self, run: CodeRun) -> Optional[None]:
         try:
             aggregated_metrics = run.get_aggregated_metrics()
             print("Aggregated metrics")
             print(aggregated_metrics)
         except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", run.evaluator_name, ex)
-            return {}
+            return None
         return aggregated_metrics
-    def get_run_summary(self, run: CodeRun) -> Any:  # pylint: disable=unused-argument
-        # Not implemented
-        return None

azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/proxy_client.py RENAMED Viewed

@@ -1,17 +1,13 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-# pylint: disable=protected-access
 import inspect
 import logging
-import math
 import os
-from collections import OrderedDict
 from concurrent.futures import Future
 from typing import Any, Callable, Dict, Optional, Union
+import numpy as np
 import pandas as pd
 from promptflow.client import PFClient
 from promptflow.entities import Run
@@ -40,7 +36,7 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
         **kwargs
     ) -> ProxyRun:
         flow_to_run = flow
-        if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
+        if hasattr(flow, "_to_async"):
             flow_to_run = flow._to_async()  # pylint: disable=protected-access
         batch_use_async = self._should_batch_use_async(flow_to_run)
@@ -57,40 +53,16 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
     def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
         run: Run = proxy_run.run.result()
         result_df = self._pf_client.get_details(run, all_results=all_results)
-        result_df.replace("(Failed)", math.nan, inplace=True)
+        result_df.replace("(Failed)", np.nan, inplace=True)
         return result_df
     def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
         run: Run = proxy_run.run.result()
         return self._pf_client.get_metrics(run)
-    def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
-        run = proxy_run.run.result()
-        # pylint: disable=protected-access
-        completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
-        failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")
-        # Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines
-        if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0:
-            status = "Completed with Errors"
-        else:
-            status = run.status
-        # Return the ordered dictionary with the updated status
-        return OrderedDict(
-            [
-                ("status", status),
-                ("duration", str(run._end_time - run._created_on)),
-                ("completed_lines", completed_lines),
-                ("failed_lines", failed_lines),
-                ("log_path", str(run._output_path)),
-            ]
-        )
     @staticmethod
     def _should_batch_use_async(flow):
-        if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
+        if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
             if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
                 return True
             if inspect.iscoroutinefunction(flow):

azure/ai/evaluation/_evaluate/_eval_run.py CHANGED Viewed

@@ -10,18 +10,16 @@ import posixpath
 import time
 import types
 import uuid
-from typing import Any, Dict, List, Optional, Set, Type
+from typing import Any, Dict, Optional, Set, Type
 from urllib.parse import urlparse
 from promptflow._sdk.entities import Run
-from typing_extensions import Self
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import get_http_client
 from azure.ai.evaluation._version import VERSION
 from azure.core.pipeline.policies import RetryPolicy
 from azure.core.rest import HttpResponse
-from azure.core.exceptions import HttpResponseError
 LOGGER = logging.getLogger(__name__)
@@ -29,20 +27,18 @@ LOGGER = logging.getLogger(__name__)
 # Handle optional import. The azure libraries are only present if
 # promptflow-azure is installed.
 try:
-    from azure.ai.ml import MLClient
     from azure.ai.ml.entities._credentials import AccountKeyConfiguration  # pylint: disable=ungrouped-imports
     from azure.ai.ml.entities._datastore.datastore import Datastore
     from azure.storage.blob import BlobServiceClient
 except (ModuleNotFoundError, ImportError):
-    raise EvaluationException(  # pylint: disable=raise-missing-from
-        message=(
-            "The required packages for remote tracking are missing.\n"
-            'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
-        ),
-        target=ErrorTarget.EVALUATE,
-        category=ErrorCategory.MISSING_PACKAGE,
-        blame=ErrorBlame.USER_ERROR,
-    )
+    # If the above mentioned modules cannot be imported, we are running
+    # in local mode and MLClient in the constructor will be None, so
+    # we will not arrive to Azure-dependent code.
+    # We are logging the import failure only if debug logging level is set because:
+    # - If the project configuration was not provided this import is not needed.
+    # - If the project configuration was provided, the error will be raised by PFClient.
+    LOGGER.debug("promptflow.azure is not installed.")
 @dataclasses.dataclass
@@ -104,6 +100,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
     _SCOPE = "https://management.azure.com/.default"
     EVALUATION_ARTIFACT = "instance_results.jsonl"
+    EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
     def __init__(
         self,
@@ -124,8 +121,8 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         self._run_name = run_name
         self._promptflow_run = promptflow_run
         self._status = RunStatus.NOT_STARTED
-        self._url_base: Optional[str] = None
-        self._info: Optional[RunInfo] = None
+        self._url_base = None
+        self.info = None
     @property
     def status(self) -> RunStatus:
@@ -137,20 +134,6 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         return self._status
-    @property
-    def info(self) -> RunInfo:
-        if self._info is None:
-            msg = "Run info is missing"
-            raise EvaluationException(
-                message=msg,
-                internal_message=msg,
-                target=ErrorTarget.EVAL_RUN,
-                category=ErrorCategory.UNKNOWN,
-                blame=ErrorBlame.UNKNOWN,
-            )
-        return self._info
     def _get_scope(self) -> str:
         """
         Return the scope information for the workspace.
@@ -178,11 +161,11 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             )
             self._url_base = None
             self._status = RunStatus.BROKEN
-            self._info = RunInfo.generate(self._run_name)
+            self.info = RunInfo.generate(self._run_name)
         else:
             self._url_base = urlparse(self._tracking_uri).netloc
             if self._promptflow_run is not None:
-                self._info = RunInfo(
+                self.info = RunInfo(
                     self._promptflow_run.name,
                     self._promptflow_run._experiment_name,  # pylint: disable=protected-access
                     self._promptflow_run.name,
@@ -199,7 +182,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
                     body["run_name"] = self._run_name
                 response = self.request_with_retry(url=url, method="POST", json_dict=body)
                 if response.status_code != 200:
-                    self._info = RunInfo.generate(self._run_name)
+                    self.info = RunInfo.generate(self._run_name)
                     LOGGER.warning(
                         "The run failed to start: %s: %s."
                         "The results will be saved locally, but will not be logged to Azure.",
@@ -209,7 +192,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
                     self._status = RunStatus.BROKEN
                 else:
                     parsed_response = response.json()
-                    self._info = RunInfo(
+                    self.info = RunInfo(
                         run_id=parsed_response["run"]["info"]["run_id"],
                         experiment_id=parsed_response["run"]["info"]["experiment_id"],
                         run_name=parsed_response["run"]["info"]["run_name"],
@@ -252,7 +235,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             LOGGER.warning("Unable to terminate the run.")
         self._status = RunStatus.TERMINATED
-    def __enter__(self) -> Self:
+    def __enter__(self):
         """The Context Manager enter call.
         :return: The instance of the class.
@@ -266,7 +249,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         exc_type: Optional[Type[BaseException]],
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
-    ) -> None:
+    ) -> Optional[bool]:
         """The context manager exit call.
         :param exc_type: The exception type
@@ -413,7 +396,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
             return
-        # Check if artifact directory is empty or does not exist.
+        # Check if artifact dirrectory is empty or does not exist.
         if not os.path.isdir(artifact_folder):
             LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
             return
@@ -425,7 +408,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             return
         # First we will list the files and the appropriate remote paths for them.
         root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
-        remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
+        remote_paths = {"paths": []}
         local_paths = []
         # Go over the artifact folder and upload all artifacts.
         for root, _, filenames in os.walk(artifact_folder):
@@ -444,32 +427,15 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         datastore = self._ml_client.datastores.get_default(include_secrets=True)
         account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
         svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
-        try:
-            for local, remote in zip(local_paths, remote_paths["paths"]):
-                blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
-                with open(local, "rb") as fp:
-                    blob_client.upload_blob(fp, overwrite=True)
-        except HttpResponseError as ex:
-            if ex.status_code == 403:
-                msg = (
-                    "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
-                    " Please ensure that the necessary access rights are granted."
-                )
-                raise EvaluationException(
-                    message=msg,
-                    target=ErrorTarget.EVAL_RUN,
-                    category=ErrorCategory.FAILED_REMOTE_TRACKING,
-                    blame=ErrorBlame.USER_ERROR,
-                    tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
-                ) from ex
-            raise ex
+        for local, remote in zip(local_paths, remote_paths["paths"]):
+            blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
+            with open(local, "rb") as fp:
+                blob_client.upload_blob(fp, overwrite=True)
         # To show artifact in UI we will need to register it. If it is a promptflow run,
         # we are rewriting already registered artifact and need to skip this step.
         if self._is_promptflow_run:
             return
         url = (
             f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
             f"/resourceGroups/{self._resource_group_name}/providers/"
@@ -492,29 +458,6 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         if response.status_code != 200:
             self._log_warning("register artifact", response)
-        # register artifacts for images if exists in image folder
-        try:
-            for remote_path in remote_paths["paths"]:
-                remote_file_path = remote_path["path"]
-                if "images" in os.path.normpath(remote_file_path).split(os.sep):
-                    response = self.request_with_retry(
-                        url=url,
-                        method="POST",
-                        json_dict={
-                            "origin": "ExperimentRun",
-                            "container": f"dcid.{self.info.run_id}",
-                            "path": posixpath.join("images", os.path.basename(remote_file_path)),
-                            "dataPath": {
-                                "dataStoreName": datastore.name,
-                                "relativePath": remote_file_path,
-                            },
-                        },
-                    )
-                    if response.status_code != 200:
-                        self._log_warning("register image artifact", response)
-        except Exception as ex:  # pylint: disable=broad-exception-caught
-            LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
     def _get_datastore_credential(self, datastore: "Datastore"):
         # Reference the logic in azure.ai.ml._artifact._artifact_utilities
         # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103

azure-ai-evaluation 1.0.0__py3-none-any.whl → 1.0.0b2__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0py3-none-any.whl → 1.0.0b2py3-none-any.whl