PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (78) hide show

azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import os
+import types
+from typing import Optional, Type, Union
 from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
 from promptflow._utils.user_agent_utils import ClientUserAgentUtil
@@ -30,12 +32,12 @@ class BatchRunContext:
     ]
     """
-    def __init__(self, client) -> None:
+    def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
         self.client = client
         self._is_batch_timeout_set_by_system = False
         self._is_otel_timeout_set_by_system = False
-    def __enter__(self):
+    def __enter__(self) -> None:
         if isinstance(self.client, CodeClient):
             ClientUserAgentUtil.append_user_agent(USER_AGENT)
             inject_openai_api()
@@ -56,7 +58,12 @@ class BatchRunContext:
             # For addressing the issue of asyncio event loop closed on Windows
             set_event_loop_policy()
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        exc_tb: Optional[types.TracebackType],
+    ) -> None:
         if isinstance(self.client, CodeClient):
             recover_openai_api()

azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py CHANGED Viewed

@@ -5,8 +5,9 @@ import inspect
 import json
 import logging
 import os
+from concurrent.futures import Future
 from pathlib import Path
-from typing import Callable, Dict, Optional, Union
+from typing import Any, Callable, Dict, Optional, Union, cast
 import pandas as pd
 from promptflow.contracts.types import AttrDict
@@ -22,25 +23,31 @@ LOGGER = logging.getLogger(__name__)
 class CodeRun:
     def __init__(
-        self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs  # pylint: disable=unused-argument
-    ):
+        self,
+        *,
+        run: Future,
+        input_data,
+        evaluator_name: Optional[str] = None,
+        aggregator: Callable[["CodeRun"], Future],
+        **kwargs,  # pylint: disable=unused-argument
+    ) -> None:
         self.run = run
         self.evaluator_name = evaluator_name if evaluator_name is not None else ""
         self.input_data = input_data
-        self.aggregated_metrics = aggregated_metrics
+        self.aggregated_metrics = aggregator(self)
-    def get_result_df(self, exclude_inputs=False):
+    def get_result_df(self, exclude_inputs: bool = False) -> pd.DataFrame:
         batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
-        result_df = self.run.result(timeout=batch_run_timeout)
+        result_df = cast(pd.DataFrame, self.run.result(timeout=batch_run_timeout))
         if exclude_inputs:
             result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")])
         return result_df
-    def get_aggregated_metrics(self):
+    def get_aggregated_metrics(self) -> Dict[str, Any]:
         try:
             batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
-            aggregated_metrics = (
-                self.aggregated_metrics.result(timeout=batch_run_timeout)
+            aggregated_metrics: Optional[Any] = (
+                cast(Dict, self.aggregated_metrics.result(timeout=batch_run_timeout))
                 if self.aggregated_metrics is not None
                 else None
             )
@@ -104,10 +111,10 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             verify_integrity=True,
         )
-    def _calculate_aggregations(self, evaluator, run):
+    @staticmethod
+    def _calculate_aggregations(evaluator: Callable, run: CodeRun) -> Any:
         try:
             if _has_aggregator(evaluator):
-                aggregate_input = None
                 evaluator_output = run.get_result_df(exclude_inputs=True)
                 if len(evaluator_output.columns) == 1 and evaluator_output.columns[0] == "output":
                     aggregate_input = evaluator_output["output"].tolist()
@@ -152,21 +159,30 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             column_mapping=column_mapping,
             evaluator_name=evaluator_name,
         )
-        run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
-        aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
-        run.aggregated_metrics = aggregation_future
-        return run
+        return CodeRun(
+            run=eval_future,
+            input_data=data,
+            evaluator_name=evaluator_name,
+            aggregator=lambda code_run: self._thread_pool.submit(
+                self._calculate_aggregations, evaluator=flow, run=code_run
+            ),
+        )
     def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
         result_df = run.get_result_df(exclude_inputs=not all_results)
         return result_df
-    def get_metrics(self, run: CodeRun) -> Optional[None]:
+    def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
         try:
             aggregated_metrics = run.get_aggregated_metrics()
             print("Aggregated metrics")
             print(aggregated_metrics)
         except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", run.evaluator_name, ex)
-            return None
+            return {}
         return aggregated_metrics
+    def get_run_summary(self, run: CodeRun) -> Any:  # pylint: disable=unused-argument
+        # Not implemented
+        return None

azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py CHANGED Viewed

@@ -3,11 +3,12 @@
 # ---------------------------------------------------------
 import inspect
 import logging
+import math
 import os
 from concurrent.futures import Future
 from typing import Any, Callable, Dict, Optional, Union
+from collections import OrderedDict
-import numpy as np
 import pandas as pd
 from promptflow.client import PFClient
 from promptflow.entities import Run
@@ -53,13 +54,27 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
     def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
         run: Run = proxy_run.run.result()
         result_df = self._pf_client.get_details(run, all_results=all_results)
-        result_df.replace("(Failed)", np.nan, inplace=True)
+        result_df.replace("(Failed)", math.nan, inplace=True)
         return result_df
     def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
         run: Run = proxy_run.run.result()
         return self._pf_client.get_metrics(run)
+    def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
+        run = proxy_run.run.result()
+        # pylint: disable=protected-access
+        return OrderedDict(
+            [
+                ("status", run.status),
+                ("duration", str(run._end_time - run._created_on)),
+                ("completed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")),
+                ("failed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")),
+                ("log_path", str(run._output_path)),
+            ]
+        )
     @staticmethod
     def _should_batch_use_async(flow):
         if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":

azure/ai/evaluation/_evaluate/_eval_run.py CHANGED Viewed

@@ -10,10 +10,11 @@ import posixpath
 import time
 import types
 import uuid
-from typing import Any, Dict, Optional, Set, Type
+from typing import Any, Dict, List, Optional, Set, Type
 from urllib.parse import urlparse
 from promptflow._sdk.entities import Run
+from typing_extensions import Self
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import get_http_client
@@ -27,6 +28,7 @@ LOGGER = logging.getLogger(__name__)
 # Handle optional import. The azure libraries are only present if
 # promptflow-azure is installed.
 try:
+    from azure.ai.ml import MLClient
     from azure.ai.ml.entities._credentials import AccountKeyConfiguration  # pylint: disable=ungrouped-imports
     from azure.ai.ml.entities._datastore.datastore import Datastore
     from azure.storage.blob import BlobServiceClient
@@ -121,8 +123,8 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         self._run_name = run_name
         self._promptflow_run = promptflow_run
         self._status = RunStatus.NOT_STARTED
-        self._url_base = None
-        self.info = None
+        self._url_base: Optional[str] = None
+        self._info: Optional[RunInfo] = None
     @property
     def status(self) -> RunStatus:
@@ -134,6 +136,20 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         return self._status
+    @property
+    def info(self) -> RunInfo:
+        if self._info is None:
+            msg = "Run info is missing"
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.EVAL_RUN,
+                category=ErrorCategory.UNKNOWN,
+                blame=ErrorBlame.UNKNOWN,
+            )
+        return self._info
     def _get_scope(self) -> str:
         """
         Return the scope information for the workspace.
@@ -161,11 +177,11 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             )
             self._url_base = None
             self._status = RunStatus.BROKEN
-            self.info = RunInfo.generate(self._run_name)
+            self._info = RunInfo.generate(self._run_name)
         else:
             self._url_base = urlparse(self._tracking_uri).netloc
             if self._promptflow_run is not None:
-                self.info = RunInfo(
+                self._info = RunInfo(
                     self._promptflow_run.name,
                     self._promptflow_run._experiment_name,  # pylint: disable=protected-access
                     self._promptflow_run.name,
@@ -182,7 +198,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
                     body["run_name"] = self._run_name
                 response = self.request_with_retry(url=url, method="POST", json_dict=body)
                 if response.status_code != 200:
-                    self.info = RunInfo.generate(self._run_name)
+                    self._info = RunInfo.generate(self._run_name)
                     LOGGER.warning(
                         "The run failed to start: %s: %s."
                         "The results will be saved locally, but will not be logged to Azure.",
@@ -192,7 +208,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
                     self._status = RunStatus.BROKEN
                 else:
                     parsed_response = response.json()
-                    self.info = RunInfo(
+                    self._info = RunInfo(
                         run_id=parsed_response["run"]["info"]["run_id"],
                         experiment_id=parsed_response["run"]["info"]["experiment_id"],
                         run_name=parsed_response["run"]["info"]["run_name"],
@@ -235,7 +251,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             LOGGER.warning("Unable to terminate the run.")
         self._status = RunStatus.TERMINATED
-    def __enter__(self):
+    def __enter__(self) -> Self:
         """The Context Manager enter call.
         :return: The instance of the class.
@@ -249,7 +265,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         exc_type: Optional[Type[BaseException]],
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
-    ) -> Optional[bool]:
+    ) -> None:
         """The context manager exit call.
         :param exc_type: The exception type
@@ -408,7 +424,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             return
         # First we will list the files and the appropriate remote paths for them.
         root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
-        remote_paths = {"paths": []}
+        remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
         local_paths = []
         # Go over the artifact folder and upload all artifacts.
         for root, _, filenames in os.walk(artifact_folder):

azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.0.0b4py3-none-any.whl