PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b5__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b5py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (72) hide show

azure/ai/evaluation/_common/rai_service.py CHANGED Viewed

@@ -6,6 +6,8 @@ import importlib.metadata
 import math
 import re
 import time
+import json
+import html
 from ast import literal_eval
 from typing import Dict, List, Optional, Union, cast
 from urllib.parse import urlparse
@@ -38,22 +40,53 @@ USER_AGENT = "{}/{}".format("azure-ai-evaluation", version)
 USER_TEXT_TEMPLATE_DICT: Dict[str, Template] = {
     "DEFAULT": Template("<Human>{$query}</><System>{$response}</>"),
-    Tasks.GROUNDEDNESS: Template('{"question": "$query", "answer": "$response", "context": "$context"}'),
 }
-def get_common_headers(token: str) -> Dict:
+def get_formatted_template(data: dict, annotation_task: str) -> str:
+    """Given the task and input data, produce a formatted string that will serve as the main
+    payload for the RAI service. Requires specific per-task logic.
+    :param data: The data to incorporate into the payload.
+    :type data: dict
+    :param annotation_task: The annotation task to use. This determines the template to use.
+    :type annotation_task: str
+    :return: The formatted based on the data and task template.
+    :rtype: str
+    """
+    # Template class doesn't play nice with json dumping/loading, just handle groundedness'
+    # JSON format manually.
+    # Template was: Template('{"question": "$query", "answer": "$response", "context": "$context"}'),
+    if annotation_task == Tasks.GROUNDEDNESS:
+        as_dict = {
+            "question": data.get("query", ""),
+            "answer": data.get("response", ""),
+            "context": data.get("context", ""),
+        }
+        return json.dumps(as_dict)
+    as_dict = {
+        "query": html.escape(data.get("query", "")),
+        "response": html.escape(data.get("response", "")),
+    }
+    user_text = USER_TEXT_TEMPLATE_DICT.get(annotation_task, USER_TEXT_TEMPLATE_DICT["DEFAULT"]).substitute(**as_dict)
+    return user_text.replace("'", '\\"')
+def get_common_headers(token: str, evaluator_name: Optional[str] = None) -> Dict:
     """Get common headers for the HTTP request
     :param token: The Azure authentication token.
     :type token: str
+    :param evaluator_name: The evaluator name. Default is None.
+    :type evaluator_name: str
     :return: The common headers.
     :rtype: Dict
     """
+    user_agent = f"{USER_AGENT} (type=evaluator; subtype={evaluator_name})" if evaluator_name else USER_AGENT
     return {
         "Authorization": f"Bearer {token}",
         "Content-Type": "application/json",
-        "User-Agent": USER_AGENT,
+        "User-Agent": user_agent,
         # Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
         # https://github.com/encode/httpx/discussions/2959
         "Connection": "close",
@@ -83,27 +116,31 @@ async def ensure_service_availability(rai_svc_url: str, token: str, capability:
     async with get_async_http_client() as client:
         response = await client.get(svc_liveness_url, headers=headers)
-    if response.status_code != 200:
-        msg = f"RAI service is not available in this region. Status Code: {response.status_code}"
-        raise EvaluationException(
-            message=msg,
-            internal_message=msg,
-            target=ErrorTarget.UNKNOWN,
-            category=ErrorCategory.SERVICE_UNAVAILABLE,
-            blame=ErrorBlame.USER_ERROR,
-        )
-    capabilities = response.json()
+        if response.status_code != 200:
+            msg = (
+                f"RAI service is unavailable in this region, or you lack the necessary permissions "
+                f"to access the AI project. Status Code: {response.status_code}"
+            )
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.RAI_CLIENT,
+                category=ErrorCategory.SERVICE_UNAVAILABLE,
+                blame=ErrorBlame.USER_ERROR,
+                tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot",
+            )
-    if capability and capability not in capabilities:
-        msg = f"Capability '{capability}' is not available in this region"
-        raise EvaluationException(
-            message=msg,
-            internal_message=msg,
-            target=ErrorTarget.RAI_CLIENT,
-            category=ErrorCategory.SERVICE_UNAVAILABLE,
-            blame=ErrorBlame.USER_ERROR,
-        )
+        capabilities = response.json()
+        if capability and capability not in capabilities:
+            msg = f"The needed capability '{capability}' is not supported by the RAI service in this region."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.RAI_CLIENT,
+                category=ErrorCategory.SERVICE_UNAVAILABLE,
+                blame=ErrorBlame.USER_ERROR,
+                tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot",
+            )
 def generate_payload(normalized_user_text: str, metric: str, annotation_task: str) -> Dict:
@@ -141,7 +178,9 @@ def generate_payload(normalized_user_text: str, metric: str, annotation_task: st
     )
-async def submit_request(data: dict, metric: str, rai_svc_url: str, token: str, annotation_task: str) -> str:
+async def submit_request(
+    data: dict, metric: str, rai_svc_url: str, token: str, annotation_task: str, evaluator_name: str
+) -> str:
     """Submit request to Responsible AI service for evaluation and return operation ID
     :param data: The data to evaluate.
@@ -154,15 +193,16 @@ async def submit_request(data: dict, metric: str, rai_svc_url: str, token: str,
     :type token: str
     :param annotation_task: The annotation task to use.
     :type annotation_task: str
+    :param evaluator_name: The evaluator name.
+    :type evaluator_name: str
     :return: The operation ID.
     :rtype: str
     """
-    user_text = USER_TEXT_TEMPLATE_DICT.get(annotation_task, USER_TEXT_TEMPLATE_DICT["DEFAULT"]).substitute(**data)
-    normalized_user_text = user_text.replace("'", '\\"')
+    normalized_user_text = get_formatted_template(data, annotation_task)
     payload = generate_payload(normalized_user_text, metric, annotation_task=annotation_task)
     url = rai_svc_url + "/submitannotation"
-    headers = get_common_headers(token)
+    headers = get_common_headers(token, evaluator_name)
     async with get_async_http_client_with_timeout() as client:
         http_response = await client.post(url, json=payload, headers=headers)
@@ -235,13 +275,27 @@ def parse_response(  # pylint: disable=too-many-branches,too-many-statements
         _InternalEvaluationMetrics.ECI,
         EvaluationMetrics.XPIA,
     }:
-        if not batch_response or len(batch_response[0]) == 0 or metric_name not in batch_response[0]:
+        result = {}
+        if not batch_response or len(batch_response[0]) == 0:
+            return {}
+        if metric_name == EvaluationMetrics.PROTECTED_MATERIAL and metric_name not in batch_response[0]:
+            pm_metric_names = {"artwork", "fictional_characters", "logos_and_brands"}
+            for pm_metric_name in pm_metric_names:
+                response = batch_response[0][pm_metric_name]
+                response = response.replace("false", "False")
+                response = response.replace("true", "True")
+                parsed_response = literal_eval(response)
+                result[pm_metric_name + "_label"] = parsed_response["label"] if "label" in parsed_response else math.nan
+                result[pm_metric_name + "_reason"] = (
+                    parsed_response["reasoning"] if "reasoning" in parsed_response else ""
+                )
+            return result
+        if metric_name not in batch_response[0]:
             return {}
         response = batch_response[0][metric_name]
         response = response.replace("false", "False")
         response = response.replace("true", "True")
         parsed_response = literal_eval(response)
-        result = {}
         # Use label instead of score since these are assumed to be boolean results.
         # Use math.nan as null value since it's ignored by aggregations rather than treated as 0.
         result[metric_display_name + "_label"] = parsed_response["label"] if "label" in parsed_response else math.nan
@@ -371,13 +425,17 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
         )
     if response.status_code != 200:
-        msg = "Failed to retrieve the discovery service URL."
+        msg = (
+            f"Failed to connect to your Azure AI project. Please check if the project scope is configured correctly, "
+            f"and make sure you have the necessary access permissions. "
+            f"Status code: {response.status_code}."
+        )
         raise EvaluationException(
             message=msg,
-            internal_message=msg,
             target=ErrorTarget.RAI_CLIENT,
-            category=ErrorCategory.SERVICE_UNAVAILABLE,
-            blame=ErrorBlame.UNKNOWN,
+            blame=ErrorBlame.USER_ERROR,
+            category=ErrorCategory.PROJECT_ACCESS_ERROR,
+            tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot",
         )
     base_url = urlparse(response.json()["properties"]["discoveryUrl"])
@@ -442,24 +500,26 @@ async def evaluate_with_rai_service(
     credential: TokenCredential,
     annotation_task: str = Tasks.CONTENT_HARM,
     metric_display_name=None,
+    evaluator_name=None,
 ) -> Dict[str, Union[str, float]]:
-    """ "Evaluate the content safety of the response using Responsible AI service
+    """Evaluate the content safety of the response using Responsible AI service
-       :param data: The data to evaluate.
-       :type data: dict
-       :param metric_name: The evaluation metric to use.
-       :type metric_name: str
-       :param project_scope: The Azure AI project scope details.
-       :type project_scope: Dict
-       :param credential: The Azure authentication credential.
-       :type credential:
-    ~azure.core.credentials.TokenCredential
-       :param annotation_task: The annotation task to use.
-       :type annotation_task: str
-       :param metric_display_name: The display name of metric to use.
-       :type metric_display_name: str
-       :return: The parsed annotation result.
-       :rtype: Dict[str, Union[str, float]]
+    :param data: The data to evaluate.
+    :type data: dict
+    :param metric_name: The evaluation metric to use.
+    :type metric_name: str
+    :param project_scope: The Azure AI project scope details.
+    :type project_scope: Dict
+    :param credential: The Azure authentication credential.
+    :type credential: ~azure.core.credentials.TokenCredential
+    :param annotation_task: The annotation task to use.
+    :type annotation_task: str
+    :param metric_display_name: The display name of metric to use.
+    :type metric_display_name: str
+    :param evaluator_name: The evaluator name to use.
+    :type evaluator_name: str
+    :return: The parsed annotation result.
+    :rtype: Dict[str, Union[str, float]]
     """
     # Get RAI service URL from discovery service and check service availability
@@ -468,7 +528,7 @@ async def evaluate_with_rai_service(
     await ensure_service_availability(rai_svc_url, token, annotation_task)
     # Submit annotation request and fetch result
-    operation_id = await submit_request(data, metric_name, rai_svc_url, token, annotation_task)
+    operation_id = await submit_request(data, metric_name, rai_svc_url, token, annotation_task, evaluator_name)
     annotation_response = cast(List[Dict], await fetch_result(operation_id, rai_svc_url, credential, token))
     result = parse_response(annotation_response, metric_name, metric_display_name)

azure/ai/evaluation/_common/utils.py CHANGED Viewed

@@ -293,14 +293,22 @@ def parse_quality_evaluator_reason_score(llm_output: str) -> Tuple[float, str]:
     score = math.nan
     reason = ""
     if llm_output:
-        score_pattern = r"<S2>(.*?)</S2>"
-        reason_pattern = r"<S1>(.*?)</S1>"
-        score_match = re.findall(score_pattern, llm_output, re.DOTALL)
-        reason_match = re.findall(reason_pattern, llm_output, re.DOTALL)
-        if score_match:
-            score = float(score_match[0].strip())
-        if reason_match:
-            reason = reason_match[0].strip()
+        try:
+            score_pattern = r"<S2>\D*?([1-5]).*?</S2>"
+            reason_pattern = r"<S1>(.*?)</S1>"
+            score_match = re.findall(score_pattern, llm_output, re.DOTALL)
+            reason_match = re.findall(reason_pattern, llm_output, re.DOTALL)
+            if score_match:
+                score = float(score_match[0].strip())
+            if reason_match:
+                reason = reason_match[0].strip()
+        except ValueError as exc:
+            raise EvaluationException(
+                message=f"Failed to parse model output: \n{llm_output}",
+                internal_message="Failed to parse model output.",
+                category=ErrorCategory.FAILED_EXECUTION,
+                blame=ErrorBlame.SYSTEM_ERROR,
+            ) from exc
     return score, reason
@@ -329,12 +337,12 @@ def retrieve_content_type(assistant_messages: List, metric: str) -> str:
         return "image"
     # Iterate through each message
-    for item in assistant_messages:
+    for message in assistant_messages:
         # Ensure "content" exists in the message and is iterable
-        content = item.get("content", [])
-        for message in content:
-            if message.get("type", "") == "image_url":
-                return "image"
+        if isinstance(message.get("content", []), list):
+            for content in message.get("content", []):
+                if content.get("type") == "image_url":
+                    return "image"
     # Default return if no image was found
     return "text"
@@ -362,6 +370,8 @@ def validate_conversation(conversation):
         )
     expected_roles = {"user", "assistant", "system"}
     image_found = False
+    assistant_message_count = 0
+    user_message_count = 0
     for num, message in enumerate(messages, 1):
         if not isinstance(message, dict):
             try:
@@ -374,17 +384,21 @@ def validate_conversation(conversation):
                 )
             except ImportError as ex:
                 raise MissingRequiredPackage(
-                    message="Please install 'azure-ai-inference' package to use SystemMessage, AssistantMessage"
+                    message="Please install 'azure-ai-inference' package to use SystemMessage, "
+                    "UserMessage or AssistantMessage."
                 ) from ex
-            if isinstance(messages[0], ChatRequestMessage) and not isinstance(
+            if isinstance(message, ChatRequestMessage) and not isinstance(
                 message, (UserMessage, AssistantMessage, SystemMessage)
             ):
                 raise_exception(
                     f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
                     ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
                 )
+            if isinstance(message, AssistantMessage):
+                assistant_message_count += 1
+            if isinstance(message, UserMessage):
+                user_message_count += 1
             if isinstance(message.content, list) and any(
                 isinstance(item, ImageContentItem) for item in message.content
             ):
@@ -395,6 +409,10 @@ def validate_conversation(conversation):
                 f"Invalid role provided: {message.get('role')}. Message number: {num}",
                 ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
             )
+        if message.get("role") == "assistant":
+            assistant_message_count += 1
+        if message.get("role") == "user":
+            user_message_count += 1
         content = message.get("content")
         if not isinstance(content, (str, list)):
             raise_exception(
@@ -409,3 +427,19 @@ def validate_conversation(conversation):
             "Message needs to have multi-modal input like images.",
             ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
         )
+    if assistant_message_count == 0:
+        raise_exception(
+            "Assistant role required in one of the messages.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )
+    if user_message_count == 0:
+        raise_exception(
+            "User role required in one of the messages.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )
+    if assistant_message_count > 1:
+        raise_exception(
+            "Evaluators for multimodal conversations only support single turn. "
+            "User and assistant role expected as the only role in each message.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )

azure/ai/evaluation/_constants.py CHANGED Viewed

@@ -54,6 +54,7 @@ class EvaluationRunProperties:
     RUN_TYPE = "runType"
     EVALUATION_RUN = "_azureml.evaluation_run"
+    EVALUATION_SDK = "_azureml.evaluation_sdk_name"
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
@@ -62,6 +63,7 @@ CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
 PF_BATCH_TIMEOUT_SEC_DEFAULT = 3600
 PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC"
+PF_DISABLE_TRACING = "PF_DISABLE_TRACING"
 OTEL_EXPORTER_OTLP_TRACES_TIMEOUT = "OTEL_EXPORTER_OTLP_TRACES_TIMEOUT"
 OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT = 60

azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py CHANGED Viewed

@@ -14,6 +14,7 @@ from azure.ai.evaluation._constants import (
     OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT,
     PF_BATCH_TIMEOUT_SEC,
     PF_BATCH_TIMEOUT_SEC_DEFAULT,
+    PF_DISABLE_TRACING,
 )
 from ..._user_agent import USER_AGENT
@@ -36,8 +37,12 @@ class EvalRunContext:
         self.client = client
         self._is_batch_timeout_set_by_system = False
         self._is_otel_timeout_set_by_system = False
+        self._original_cwd = os.getcwd()
     def __enter__(self) -> None:
+        # Preserve current working directory, as PF may change it without restoring it afterward
+        self._original_cwd = os.getcwd()
         if isinstance(self.client, CodeClient):
             ClientUserAgentUtil.append_user_agent(USER_AGENT)
             inject_openai_api()
@@ -45,6 +50,7 @@ class EvalRunContext:
         if isinstance(self.client, ProxyClient):
             os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
             os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"
+            os.environ[PF_DISABLE_TRACING] = "true"
             if os.environ.get(PF_BATCH_TIMEOUT_SEC) is None:
                 os.environ[PF_BATCH_TIMEOUT_SEC] = str(PF_BATCH_TIMEOUT_SEC_DEFAULT)
@@ -64,12 +70,15 @@ class EvalRunContext:
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
     ) -> None:
+        os.chdir(self._original_cwd)
         if isinstance(self.client, CodeClient):
             recover_openai_api()
         if isinstance(self.client, ProxyClient):
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
             os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None)
+            os.environ.pop(PF_DISABLE_TRACING, None)
             if self._is_batch_timeout_set_by_system:
                 os.environ.pop(PF_BATCH_TIMEOUT_SEC, None)

azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py CHANGED Viewed

@@ -68,12 +68,22 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
         run = proxy_run.run.result()
         # pylint: disable=protected-access
+        completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
+        failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")
+        # Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines
+        if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0:
+            status = "Completed with Errors"
+        else:
+            status = run.status
+        # Return the ordered dictionary with the updated status
         return OrderedDict(
             [
-                ("status", run.status),
+                ("status", status),
                 ("duration", str(run._end_time - run._created_on)),
-                ("completed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")),
-                ("failed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")),
+                ("completed_lines", completed_lines),
+                ("failed_lines", failed_lines),
                 ("log_path", str(run._output_path)),
             ]
         )

azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py CHANGED Viewed

@@ -6,6 +6,7 @@ import types
 from typing import Optional, Type
 from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
+from azure.ai.evaluation._constants import PF_DISABLE_TRACING
 class TargetRunContext:
@@ -15,21 +16,31 @@ class TargetRunContext:
     :type upload_snapshot: bool
     """
-    def __init__(self, upload_snapshot: bool) -> None:
+    def __init__(self, upload_snapshot: bool = False) -> None:
         self._upload_snapshot = upload_snapshot
+        self._original_cwd = os.getcwd()
     def __enter__(self) -> None:
+        # Preserve current working directory, as PF may change it without restoring it afterward
+        self._original_cwd = os.getcwd()
         # Address "[WinError 32] The process cannot access the file" error,
         # caused by conflicts when the venv and target function are in the same directory.
         # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
         if not self._upload_snapshot:
             os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
+        os.environ[PF_DISABLE_TRACING] = "true"
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
     ) -> None:
+        os.chdir(self._original_cwd)
         if not self._upload_snapshot:
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
+        os.environ.pop(PF_DISABLE_TRACING, None)

azure/ai/evaluation/_evaluate/_eval_run.py CHANGED Viewed

@@ -22,28 +22,12 @@ from azure.ai.evaluation._version import VERSION
 from azure.core.pipeline.policies import RetryPolicy
 from azure.core.rest import HttpResponse
 from azure.core.exceptions import HttpResponseError
+from azure.storage.blob import BlobServiceClient
+from azure.ai.evaluation._azure._clients import LiteMLClient
 LOGGER = logging.getLogger(__name__)
-# Handle optional import. The azure libraries are only present if
-# promptflow-azure is installed.
-try:
-    from azure.ai.ml import MLClient
-    from azure.ai.ml.entities._credentials import AccountKeyConfiguration  # pylint: disable=ungrouped-imports
-    from azure.ai.ml.entities._datastore.datastore import Datastore
-    from azure.storage.blob import BlobServiceClient
-except (ModuleNotFoundError, ImportError):
-    # If the above mentioned modules cannot be imported, we are running
-    # in local mode and MLClient in the constructor will be None, so
-    # we will not arrive to Azure-dependent code.
-    # We are logging the import failure only if debug logging level is set because:
-    # - If the project configuration was not provided this import is not needed.
-    # - If the project configuration was provided, the error will be raised by PFClient.
-    LOGGER.debug("promptflow.azure is not installed.")
 @dataclasses.dataclass
 class RunInfo:
     """
@@ -92,18 +76,18 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
     :type group_name: str
     :param workspace_name: The name of workspace/project used to track run.
     :type workspace_name: str
-    :param ml_client: The ml client used for authentication into Azure.
-    :type ml_client: azure.ai.ml.MLClient
+    :param management_client: The trace destination string to parse the AI ML workspace blob store from.
+    :type management_client:
+        ~azure.ai.evaluation._promptflow.azure._lite_azure_management_client.LiteMLClient
     :param promptflow_run: The promptflow run used by the
+    :type promptflow_run: Optional[promptflow._sdk.entities.Run]
     """
     _MAX_RETRIES = 5
     _BACKOFF_FACTOR = 2
     _TIMEOUT = 5
-    _SCOPE = "https://management.azure.com/.default"
     EVALUATION_ARTIFACT = "instance_results.jsonl"
-    EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
     def __init__(
         self,
@@ -112,14 +96,14 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         subscription_id: str,
         group_name: str,
         workspace_name: str,
-        ml_client: "MLClient",
+        management_client: LiteMLClient,
         promptflow_run: Optional[Run] = None,
     ) -> None:
         self._tracking_uri: str = tracking_uri
         self._subscription_id: str = subscription_id
         self._resource_group_name: str = group_name
         self._workspace_name: str = workspace_name
-        self._ml_client: Any = ml_client
+        self._management_client: LiteMLClient = management_client
         self._is_promptflow_run: bool = promptflow_run is not None
         self._run_name = run_name
         self._promptflow_run = promptflow_run
@@ -184,7 +168,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
             if self._promptflow_run is not None:
                 self._info = RunInfo(
                     self._promptflow_run.name,
-                    self._promptflow_run._experiment_name,  # pylint: disable=protected-access
+                    self._promptflow_run._experiment_name or "",  # pylint: disable=protected-access
                     self._promptflow_run.name,
                 )
             else:
@@ -310,12 +294,8 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
-    def _get_token(self):
-        # We have to use lazy import because promptflow.azure
-        # is an optional dependency.
-        from promptflow.azure._utils._token_cache import ArmTokenCache  # pylint: disable=import-error,no-name-in-module
-        return ArmTokenCache().get_token(self._ml_client._credential)  # pylint: disable=protected-access
+    def _get_token(self) -> str:
+        return self._management_client.get_token()
     def request_with_retry(
         self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
@@ -413,7 +393,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
             return
-        # Check if artifact dirrectory is empty or does not exist.
+        # Check if artifact directory is empty or does not exist.
         if not os.path.isdir(artifact_folder):
             LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
             return
@@ -441,9 +421,10 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
                 local_paths.append(local_file_path)
         # We will write the artifacts to the workspaceblobstore
-        datastore = self._ml_client.datastores.get_default(include_secrets=True)
+        datastore = self._management_client.workspace_get_default_datastore(self._workspace_name, True)
         account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
-        svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
+        svc_client = BlobServiceClient(account_url=account_url, credential=datastore.credential)
         try:
             for local, remote in zip(local_paths, remote_paths["paths"]):
                 blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
@@ -469,6 +450,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         # we are rewriting already registered artifact and need to skip this step.
         if self._is_promptflow_run:
             return
         url = (
             f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
             f"/resourceGroups/{self._resource_group_name}/providers/"
@@ -491,15 +473,28 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         if response.status_code != 200:
             self._log_warning("register artifact", response)
-    def _get_datastore_credential(self, datastore: "Datastore"):
-        # Reference the logic in azure.ai.ml._artifact._artifact_utilities
-        # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103
-        credential = datastore.credentials
-        if isinstance(credential, AccountKeyConfiguration):
-            return credential.account_key
-        if hasattr(credential, "sas_token"):
-            return credential.sas_token
-        return self._ml_client.datastores._credential  # pylint: disable=protected-access
+        # register artifacts for images if exists in image folder
+        try:
+            for remote_path in remote_paths["paths"]:
+                remote_file_path = remote_path["path"]
+                if "images" in os.path.normpath(remote_file_path).split(os.sep):
+                    response = self.request_with_retry(
+                        url=url,
+                        method="POST",
+                        json_dict={
+                            "origin": "ExperimentRun",
+                            "container": f"dcid.{self.info.run_id}",
+                            "path": posixpath.join("images", os.path.basename(remote_file_path)),
+                            "dataPath": {
+                                "dataStoreName": datastore.name,
+                                "relativePath": remote_file_path,
+                            },
+                        },
+                    )
+                    if response.status_code != 200:
+                        self._log_warning("register image artifact", response)
+        except Exception as ex:  # pylint: disable=broad-exception-caught
+            LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
     def log_metric(self, key: str, value: float) -> None:
         """

azure-ai-evaluation 1.0.0b5__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b5py3-none-any.whl → 1.1.0py3-none-any.whl