PyPI - azure-ai-evaluation - Versions diffs - 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (150) hide show

azure/ai/evaluation/_common/utils.py CHANGED Viewed

@@ -1,15 +1,17 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import os
+import posixpath
 import re
 import math
 import threading
 from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
 import nltk
+from azure.storage.blob import ContainerClient
 from typing_extensions import NotRequired, Required, TypeGuard
-from promptflow.core._errors import MissingRequiredPackage
+from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
 from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._model_configurations import (
@@ -124,10 +126,25 @@ def construct_prompty_model_config(
     return prompty_model_config
+def is_onedp_project(azure_ai_project: AzureAIProject) -> bool:
+    """Check if the Azure AI project is an OneDP project.
+    :param azure_ai_project: The scope of the Azure AI project.
+    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :return: True if the Azure AI project is an OneDP project, False otherwise.
+    :rtype: bool
+    """
+    if isinstance(azure_ai_project, str):
+        return True
+    return False
 def validate_azure_ai_project(o: object) -> AzureAIProject:
     fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
+    # TODO : Add regex check for malformed project uri
+    if is_onedp_project(o):
+        return o
     if not isinstance(o, dict):
         msg = "The 'azure_ai_project' parameter must be a dictionary."
         raise EvaluationException(
@@ -463,3 +480,64 @@ def validate_conversation(conversation):
             "User and assistant role expected as the only role in each message.",
             ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
         )
+def upload(path: str, container_client: ContainerClient, logger=None):
+    """Upload files or directories to Azure Blob Storage using a container client.
+    This function uploads a file or all files in a directory (recursively) to Azure Blob Storage.
+    When uploading a directory, the relative path structure is preserved in the blob container.
+    :param path: The local path to a file or directory to upload
+    :type path: str
+    :param container_client: The Azure Blob Container client to use for uploading
+    :type container_client: azure.storage.blob.ContainerClient
+    :param logger: Optional logger for debug output, defaults to None
+    :type logger: logging.Logger, optional
+    :raises EvaluationException: If the path doesn't exist or errors occur during upload
+    """
+    if not os.path.isdir(path) and not os.path.isfile(path):
+        raise EvaluationException(
+            message=f"Path '{path}' is not a directory or a file",
+            internal_message=f"Path '{path}' is not a directory or a file",
+            target=ErrorTarget.RAI_CLIENT,
+            category=ErrorCategory.INVALID_VALUE,
+            blame=ErrorBlame.SYSTEM_ERROR,
+        )
+    remote_paths = []
+    local_paths = []
+    if os.path.isdir(path):
+        for (root, _, filenames) in os.walk(path):
+            upload_path = ""
+            if root != path:
+                rel_path = os.path.relpath(root, path)
+                upload_path = posixpath.join(rel_path)
+            for f in filenames:
+                remote_file_path = posixpath.join(upload_path, f)
+                remote_paths.append(remote_file_path)
+                local_file_path = os.path.join(root, f)
+                local_paths.append(local_file_path)
+    if os.path.isfile(path):
+        remote_paths = [os.path.basename(path)]
+        local_paths = [path]
+    try:
+        # Open the file in binary read mode
+        for local, remote in zip(local_paths, remote_paths):
+            with open(local, "rb") as data:
+                # Upload the file to Azure Blob Storage
+                container_client.upload_blob(data=data, name=remote)
+            if logger:
+                logger.debug(f"File '{local}' uploaded successfully")
+    except Exception as e:
+        raise EvaluationException(
+            message=f"Error uploading file: {e}",
+            internal_message=f"Error uploading file: {e}",
+            target=ErrorTarget.RAI_CLIENT,
+            category=ErrorCategory.UPLOAD_ERROR,
+            blame=ErrorBlame.SYSTEM_ERROR,
+        )

azure/ai/evaluation/_constants.py CHANGED Viewed

@@ -62,6 +62,8 @@ class EvaluationRunProperties:
     RUN_TYPE = "runType"
     EVALUATION_RUN = "_azureml.evaluation_run"
     EVALUATION_SDK = "_azureml.evaluation_sdk_name"
+    NAME_MAP = "_azureml.evaluation_name_map"
+    NAME_MAP_LENGTH = "_azureml.evaluation_name_map_length"
 @experimental
@@ -79,6 +81,13 @@ class _AggregationType(enum.Enum):
     SUM = "sum"
     CUSTOM = "custom"
+class TokenScope(str, enum.Enum):
+    """Defines the scope of the token used to access Azure resources."""
+    DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
+    COGNITIVE_SERVICES_MANAGEMENT = "https://ai.azure.com/.default"
+    AZURE_ML = "https://ml.azure.com/.default"
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
@@ -99,3 +108,10 @@ EVALUATION_PASS_FAIL_MAPPING = {
     True: "pass",
     False: "fail",
 }
+DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS = 60000
+BINARY_AGGREGATE_SUFFIX = "binary_aggregate"
+AOAI_COLUMN_NAME = "aoai"
+DEFAULT_OAI_EVAL_RUN_NAME = "AI_SDK_EVAL_RUN"
+DEFAULT_AOAI_API_VERSION = "2025-04-01-preview" # Unfortunately relying on preview version for now.

azure/ai/evaluation/_converters/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
+# ---------------------------------------------------------

azure/ai/evaluation/_converters/_ai_services.py CHANGED Viewed

@@ -667,7 +667,7 @@ class AIAgentConverter:
         return evaluations
     @staticmethod
-    def run_ids_from_conversation(conversation: dict) -> List[str]:
+    def _run_ids_from_conversation(conversation: dict) -> List[str]:
         """
         Extracts a list of unique run IDs from a conversation dictionary.
@@ -684,7 +684,7 @@ class AIAgentConverter:
         return run_ids
     @staticmethod
-    def convert_from_conversation(
+    def _convert_from_conversation(
         conversation: dict, run_id: str, exclude_tool_calls_previous_runs: bool = False
     ) -> dict:
         """
@@ -765,7 +765,7 @@ class AIAgentConverter:
         return json.loads(final_result.to_json())
     @staticmethod
-    def convert_from_file(filename: str, run_id: str) -> dict:
+    def _convert_from_file(filename: str, run_id: str) -> dict:
         """
         Converts the agent run from a JSON file to a format suitable for the OpenAI API, the JSON file being a thread.
@@ -801,4 +801,4 @@ class AIAgentConverter:
         with open(filename, mode="r", encoding="utf-8") as file:
             data = json.load(file)
-        return AIAgentConverter.convert_from_conversation(data, run_id)
+        return AIAgentConverter._convert_from_conversation(data, run_id)

azure/ai/evaluation/_eval_mapping.py ADDED Viewed

@@ -0,0 +1,71 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+# Note: This was removed from the normal constants file due to circular import issues.
+# In the future, it would be nice to instead rely on the id value
+# of each eval class, but I wouldn't like to rely on those before
+# we simplify them into version-less, static values, instead of the
+# problematic registry references they currently are.
+# Import all evals
+from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
+from azure.ai.evaluation import (
+    BleuScoreEvaluator,
+    CodeVulnerabilityEvaluator,
+    CoherenceEvaluator,
+    ContentSafetyEvaluator,
+    F1ScoreEvaluator,
+    FluencyEvaluator,
+    GleuScoreEvaluator,
+    GroundednessEvaluator,
+    GroundednessProEvaluator,
+    HateUnfairnessEvaluator,
+    IndirectAttackEvaluator,
+    IntentResolutionEvaluator,
+    MeteorScoreEvaluator,
+    ProtectedMaterialEvaluator,
+    QAEvaluator,
+    RelevanceEvaluator,
+    ResponseCompletenessEvaluator,
+    RetrievalEvaluator,
+    RougeScoreEvaluator,
+    SelfHarmEvaluator,
+    SexualEvaluator,
+    SimilarityEvaluator,
+    TaskAdherenceEvaluator,
+    ToolCallAccuracyEvaluator,
+    UngroundedAttributesEvaluator,
+    ViolenceEvaluator
+)
+EVAL_CLASS_MAP = {
+    BleuScoreEvaluator: "bleu_score",
+    CodeVulnerabilityEvaluator: "code_vulnerability",
+    CoherenceEvaluator: "coherence",
+    ContentSafetyEvaluator: "content_safety",
+    ECIEvaluator: "eci",
+    F1ScoreEvaluator: "f1_score",
+    FluencyEvaluator: "fluency",
+    GleuScoreEvaluator: "gleu_score",
+    GroundednessEvaluator: "groundedness",
+    GroundednessProEvaluator: "groundedness_pro",
+    HateUnfairnessEvaluator: "hate_unfairness",
+    IndirectAttackEvaluator: "indirect_attack",
+    IntentResolutionEvaluator: "intent_resolution",
+    MeteorScoreEvaluator: "meteor_score",
+    ProtectedMaterialEvaluator: "protected_material",
+    QAEvaluator: "qa",
+    RelevanceEvaluator: "relevance",
+    ResponseCompletenessEvaluator: "response_completeness",
+    RetrievalEvaluator: "retrieval",
+    RougeScoreEvaluator: "rouge_score",
+    SelfHarmEvaluator: "self_harm",
+    SexualEvaluator: "sexual",
+    SimilarityEvaluator: "similarity",
+    TaskAdherenceEvaluator: "task_adherence",
+    ToolCallAccuracyEvaluator: "tool_call_accuracy",
+    UngroundedAttributesEvaluator: "ungrounded_attributes",
+    ViolenceEvaluator: "violence",
+}

azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py CHANGED Viewed

@@ -2,11 +2,12 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import asyncio
 import logging
 import pandas as pd
 import sys
 from collections import defaultdict
-from concurrent.futures import Future, ThreadPoolExecutor
+from concurrent.futures import Future
 from os import PathLike
 from typing import Any, Callable, Dict, Final, List, Mapping, Optional, Sequence, Union, cast
@@ -14,6 +15,8 @@ from .batch_clients import BatchClientRun, HasAsyncCallable
 from ..._legacy._batch_engine._run_submitter import RunSubmitter
 from ..._legacy._batch_engine._config import BatchEngineConfig
 from ..._legacy._batch_engine._run import Run
+from ..._legacy._adapters._constants import LINE_NUMBER
+from ..._legacy._common._thread_pool_executor_with_context import ThreadPoolExecutorWithContext
 LOGGER = logging.getLogger(__name__)
@@ -22,7 +25,9 @@ LOGGER = logging.getLogger(__name__)
 class RunSubmitterClient:
     def __init__(self, config: Optional[BatchEngineConfig] = None) -> None:
         self._config = config or BatchEngineConfig(LOGGER, use_async=True)
-        self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
+        self._thread_pool = ThreadPoolExecutorWithContext(
+            thread_name_prefix="evaluators_thread",
+            max_workers=self._config.max_concurrency)
     def run(
         self,
@@ -33,30 +38,36 @@ class RunSubmitterClient:
         **kwargs: Any,
     ) -> BatchClientRun:
         if not isinstance(data, pd.DataFrame):
-            # Should never get here
             raise ValueError("Data must be a pandas DataFrame")
-        if not column_mapping:
-            raise ValueError("Column mapping must be provided")
-        # The column mappings are index by data to indicate they come from the data
+        # The column mappings are indexed by data to indicate they come from the data
         # input. Update the inputs so that each entry is a dictionary with a data key
         # that contains the original input data.
         inputs = [{"data": input_data} for input_data in data.to_dict(orient="records")]
-        # always uses async behind the scenes
+        # Pass the correct previous run to the evaluator
+        run: Optional[BatchClientRun] = kwargs.pop("run", None)
+        if run:
+            kwargs["run"] = self._get_run(run)
+        # Try to get async function to use
         if isinstance(flow, HasAsyncCallable):
             flow = flow._to_async()  # pylint: disable=protected-access
-        run_submitter = RunSubmitter(self._config)
+        # Start an event loop for async execution on a thread pool thread to separate it
+        # from the caller's thread.
+        run_submitter = RunSubmitter(self._config, self._thread_pool)
         run_future = self._thread_pool.submit(
-            run_submitter.submit,
-            dynamic_callable=flow,
-            inputs=inputs,
-            column_mapping=column_mapping,
-            name_prefix=evaluator_name,
-            created_on=kwargs.pop("created_on", None),
-            storage_creator=kwargs.pop("storage_creator", None),
-            **kwargs,
+            asyncio.run,
+            run_submitter.submit(
+                dynamic_callable=flow,
+                inputs=inputs,
+                column_mapping=column_mapping,
+                name_prefix=evaluator_name,
+                created_on=kwargs.pop("created_on", None),
+                storage_creator=kwargs.pop("storage_creator", None),
+                **kwargs,
+            )
         )
         return run_future
@@ -75,7 +86,10 @@ class RunSubmitterClient:
                     key = f"{prefix}.{k}"
                     data[key].append(value)
+        # Go from a list of dictionaries (i.e. a row view of the data) to a dictionary of lists
+        # (i.e. a column view of the data)
         _update("inputs", run.inputs)
+        _update("inputs", [{ LINE_NUMBER: i } for i in range(len(run.inputs)) ])
         _update("outputs", run.outputs)
         df = pd.DataFrame(data).reindex(columns=[k for k in data.keys()])

azure/ai/evaluation/_evaluate/_batch_run/code_client.py CHANGED Viewed

@@ -6,17 +6,17 @@ import json
 import logging
 import os
 from concurrent.futures import Future
-from pathlib import Path
-from typing import Any, Callable, Dict, Optional, Union, cast
+from typing import Any, Callable, Dict, Optional, Sequence, Union, cast
 import pandas as pd
-from promptflow.contracts.types import AttrDict
-from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
+from azure.ai.evaluation._legacy._adapters.types import AttrDict
+from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
 from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
+from .batch_clients import BatchClientRun
 LOGGER = logging.getLogger(__name__)
@@ -84,7 +84,7 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             for param in inspect.signature(evaluator).parameters.values()
             if param.name not in ["args", "kwargs"]
         }
-        for value in input_df.to_dict("records"):
+        for value in cast(Sequence[Dict[str, Any]], input_df.to_dict("records")):
             # Filter out only the parameters that are present in the input data
             # if no parameters then pass data as is
             filtered_values = {k: v for k, v in value.items() if k in parameters} if len(parameters) > 0 else value
@@ -133,10 +133,10 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
     def run(
         self,  # pylint: disable=unused-argument
         flow: Callable,
-        data: Union[os.PathLike, Path, pd.DataFrame],
-        evaluator_name: Optional[str] = None,
+        data: Union[str, os.PathLike, pd.DataFrame],
         column_mapping: Optional[Dict[str, str]] = None,
-        **kwargs,
+        evaluator_name: Optional[str] = None,
+        **kwargs: Any,
     ) -> CodeRun:
         input_df = data
         if not isinstance(input_df, pd.DataFrame):
@@ -157,7 +157,7 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             evaluator=flow,
             input_df=input_df,
             column_mapping=column_mapping,
-            evaluator_name=evaluator_name,
+            evaluator_name=evaluator_name or "",
         )
         return CodeRun(
@@ -169,11 +169,13 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             ),
         )
-    def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
+    def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
+        run = self._get_result(client_run)
         result_df = run.get_result_df(exclude_inputs=not all_results)
         return result_df
-    def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
+    def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
+        run = self._get_result(client_run)
         try:
             aggregated_metrics = run.get_aggregated_metrics()
             print("Aggregated metrics")
@@ -183,6 +185,10 @@ class CodeClient:  # pylint: disable=client-accepts-api-version-keyword
             return {}
         return aggregated_metrics
-    def get_run_summary(self, run: CodeRun) -> Any:  # pylint: disable=unused-argument
+    def get_run_summary(self, client_run: BatchClientRun) -> Any:  # pylint: disable=unused-argument
         # Not implemented
         return None
+    @staticmethod
+    def _get_result(run: BatchClientRun) -> CodeRun:
+        return cast(CodeRun, run)

azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py CHANGED Viewed

@@ -5,9 +5,13 @@ import os
 import types
 from typing import Optional, Type, Union
-from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
-from promptflow._utils.user_agent_utils import ClientUserAgentUtil
-from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
+from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
+from azure.ai.evaluation._legacy._adapters.utils import ClientUserAgentUtil
+from azure.ai.evaluation._legacy._adapters.tracing import inject_openai_api, recover_openai_api
+from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
+    inject_openai_api as ported_inject_openai_api,
+    recover_openai_api as ported_recover_openai_api,
+)
 from azure.ai.evaluation._constants import (
     OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
@@ -19,6 +23,8 @@ from azure.ai.evaluation._constants import (
 from ..._user_agent import USER_AGENT
 from .._utils import set_event_loop_policy
+from .batch_clients import BatchClient
+from ._run_submitter_client import RunSubmitterClient
 from .code_client import CodeClient
 from .proxy_client import ProxyClient
@@ -33,7 +39,7 @@ class EvalRunContext:
     ]
     """
-    def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
+    def __init__(self, client: BatchClient) -> None:
         self.client = client
         self._is_batch_timeout_set_by_system = False
         self._is_otel_timeout_set_by_system = False
@@ -64,6 +70,10 @@ class EvalRunContext:
             # For addressing the issue of asyncio event loop closed on Windows
             set_event_loop_policy()
+        if isinstance(self.client, RunSubmitterClient):
+            set_event_loop_policy()
+            ported_inject_openai_api()
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
@@ -87,3 +97,6 @@ class EvalRunContext:
             if self._is_otel_timeout_set_by_system:
                 os.environ.pop(OTEL_EXPORTER_OTLP_TRACES_TIMEOUT, None)
                 self._is_otel_timeout_set_by_system = False
+        if isinstance(self.client, RunSubmitterClient):
+            ported_recover_openai_api()

azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py CHANGED Viewed

@@ -8,15 +8,21 @@ import inspect
 import logging
 import math
 import os
+from datetime import datetime
 from collections import OrderedDict
 from concurrent.futures import Future
-from typing import Any, Callable, Dict, Optional, Union
+from typing import Any, Callable, Dict, Optional, Union, cast
+from azure.ai.evaluation._legacy._adapters.entities import Run
+from azure.ai.evaluation._legacy._adapters._configuration import Configuration
+from azure.ai.evaluation._legacy._adapters.client import PFClient
+from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext
 import pandas as pd
-from promptflow.client import PFClient
-from promptflow.entities import Run
-from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
+from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClientRun, HasAsyncCallable
+Configuration.get_instance().set_config("trace.destination", "none")
 LOGGER = logging.getLogger(__name__)
@@ -26,46 +32,61 @@ class ProxyRun:
 class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
-    def __init__(  # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
-        self, pf_client: PFClient
+    def __init__(  # pylint: disable=missing-client-constructor-parameter-credential
+        self,
+        **kwargs: Any,
     ) -> None:
-        self._pf_client = pf_client
-        self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
+        self._pf_client = PFClient(**kwargs)
+        self._thread_pool = ThreadPoolExecutorWithContext(thread_name_prefix="evaluators_thread")
     def run(
         self,
-        flow: Union[str, os.PathLike, Callable],
-        data: Union[str, os.PathLike],
+        flow: Callable,
+        data: Union[str, os.PathLike, pd.DataFrame],
         column_mapping: Optional[Dict[str, str]] = None,
-        **kwargs
+        evaluator_name: Optional[str] = None,
+        **kwargs: Any,
     ) -> ProxyRun:
-        flow_to_run = flow
-        if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
+        if isinstance(data, pd.DataFrame):
+            raise ValueError("Data cannot be a pandas DataFrame")
+        flow_to_run: Callable = flow
+        if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and isinstance(flow, HasAsyncCallable):
             flow_to_run = flow._to_async()  # pylint: disable=protected-access
+        name: str = kwargs.pop("name", "")
+        if not name:
+            name = f"azure_ai_evaluation_evaluators_{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
+        # Pass the correct previous run to the evaluator
+        run: Optional[BatchClientRun] = kwargs.pop("run", None)
+        if run:
+            kwargs["run"] = self.get_result(run)
         batch_use_async = self._should_batch_use_async(flow_to_run)
         eval_future = self._thread_pool.submit(
             self._pf_client.run,
             flow_to_run,
             data=data,
-            column_mapping=column_mapping,
+            column_mapping=column_mapping,  # type: ignore
             batch_use_async=batch_use_async,
-            **kwargs
+            name=name,
+            **kwargs,
         )
         return ProxyRun(run=eval_future)
-    def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
-        run: Run = proxy_run.run.result()
+    def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
+        run: Run = self.get_result(client_run)
         result_df = self._pf_client.get_details(run, all_results=all_results)
         result_df.replace("(Failed)", math.nan, inplace=True)
         return result_df
-    def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
-        run: Run = proxy_run.run.result()
+    def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
+        run: Run = self.get_result(client_run)
         return self._pf_client.get_metrics(run)
-    def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
-        run = proxy_run.run.result()
+    def get_run_summary(self, client_run: BatchClientRun) -> Dict[str, Any]:
+        run: Run = self.get_result(client_run)
         # pylint: disable=protected-access
         completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
@@ -81,13 +102,17 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
         return OrderedDict(
             [
                 ("status", status),
-                ("duration", str(run._end_time - run._created_on)),
+                ("duration", str((run._end_time or run._created_on) - run._created_on)),
                 ("completed_lines", completed_lines),
                 ("failed_lines", failed_lines),
                 ("log_path", str(run._output_path)),
             ]
         )
+    @staticmethod
+    def get_result(run: BatchClientRun) -> Run:
+        return cast(ProxyRun, run).run.result()
     @staticmethod
     def _should_batch_use_async(flow):
         if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":

azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py CHANGED Viewed

@@ -5,8 +5,15 @@ import os
 import types
 from typing import Optional, Type
-from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
+from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClient
+from azure.ai.evaluation._evaluate._batch_run import RunSubmitterClient
+from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
+from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
+    inject_openai_api as ported_inject_openai_api,
+    recover_openai_api as ported_recover_openai_api,
+)
 from azure.ai.evaluation._constants import PF_DISABLE_TRACING
+from azure.ai.evaluation._evaluate._utils import set_event_loop_policy
 class TargetRunContext:
@@ -16,7 +23,8 @@ class TargetRunContext:
     :type upload_snapshot: bool
     """
-    def __init__(self, upload_snapshot: bool = False) -> None:
+    def __init__(self, client: BatchClient, upload_snapshot: bool = False) -> None:
+        self._client = client
         self._upload_snapshot = upload_snapshot
         self._original_cwd = os.getcwd()
@@ -32,6 +40,11 @@ class TargetRunContext:
         os.environ[PF_DISABLE_TRACING] = "true"
+        if isinstance(self._client, RunSubmitterClient):
+            ported_inject_openai_api()
+            # For addressing the issue of asyncio event loop closed on Windows
+            set_event_loop_policy()
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
@@ -44,3 +57,6 @@ class TargetRunContext:
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
         os.environ.pop(PF_DISABLE_TRACING, None)
+        if isinstance(self._client, RunSubmitterClient):
+            ported_recover_openai_api()

azure/ai/evaluation/_evaluate/_eval_run.py CHANGED Viewed

@@ -13,7 +13,7 @@ import uuid
 from typing import Any, Dict, List, Optional, Set, Type
 from urllib.parse import urlparse
-from promptflow._sdk.entities import Run
+from azure.ai.evaluation._legacy._adapters.entities import Run
 from typing_extensions import Self
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
@@ -295,7 +295,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
     def _get_token(self) -> str:
-        return self._management_client.get_token()
+        return self._management_client.get_token().token
     def request_with_retry(
         self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None

azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl