PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b4py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

azure/ai/evaluation/_common/utils.py CHANGED Viewed

@@ -2,13 +2,14 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import re
 import math
 import threading
-from typing import Any, List, Literal, Mapping, Type, TypeVar, Union, cast, get_args, get_origin
+from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
 import nltk
 from typing_extensions import NotRequired, Required, TypeGuard
+from promptflow.core._errors import MissingRequiredPackage
 from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._model_configurations import (
@@ -24,7 +25,7 @@ _nltk_data_download_lock = threading.Lock()
 T_TypedDict = TypeVar("T_TypedDict", bound=Mapping[Any, Any])
-def get_harm_severity_level(harm_score: int) -> Union[str, float]:
+def get_harm_severity_level(harm_score: Union[float, int]) -> Union[str, float]:
     """Generate harm severity level based on harm score.
     :param harm_score: The harm score to be evaluated.
@@ -48,13 +49,19 @@ def get_harm_severity_level(harm_score: int) -> Union[str, float]:
 def ensure_nltk_data_downloaded():
     """Download NLTK data packages if not already downloaded."""
+    nltk_data = [
+        ("wordnet", "corpora/wordnet.zip"),
+        ("perluniprops", "misc/perluniprops.zip"),
+        ("punkt", "tokenizers/punkt.zip"),
+        ("punkt_tab", "tokenizers/punkt_tab.zip"),
+    ]
     with _nltk_data_download_lock:
-        try:
-            from nltk.tokenize.nist import NISTTokenizer  # pylint: disable=unused-import
-        except LookupError:
-            nltk.download("perluniprops")
-            nltk.download("punkt")
-            nltk.download("punkt_tab")
+        for _id, resource_name in nltk_data:
+            try:
+                nltk.find(resource_name)
+            except LookupError:
+                nltk.download(_id)
 def nltk_tokenize(text: str) -> List[str]:
@@ -122,24 +129,23 @@ def validate_azure_ai_project(o: object) -> AzureAIProject:
     fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
     if not isinstance(o, dict):
-        msg = "azure_ai_project must be a dictionary"
+        msg = "The 'azure_ai_project' parameter must be a dictionary."
         raise EvaluationException(
             message=msg,
-            internal_message=msg,
-            target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
-            category=ErrorCategory.MISSING_FIELD,
+            category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.USER_ERROR,
         )
     missing_fields = set(fields.keys()) - o.keys()
     if missing_fields:
-        msg = "azure_ai_project must contain keys: " + ", ".join(f'"{field}"' for field in missing_fields)
+        msg = (
+            "The 'azure_ai_project' dictionary is missing the following required "
+            f"field(s): {', '.join(f'{field}' for field in missing_fields)}."
+        )
         raise EvaluationException(
             message=msg,
-            internal_message=msg,
-            target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
-            category=ErrorCategory.MISSING_FIELD,
+            category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.USER_ERROR,
         )
@@ -147,13 +153,10 @@ def validate_azure_ai_project(o: object) -> AzureAIProject:
         if isinstance(o[field_name], expected_type):
             continue
-        msg = f"Expected azure_ai_project field {field_name!r} to be of type {expected_type}."
+        msg = f"Invalid type for field '{field_name}'. Expected {expected_type}, but got {type(o[field_name])}."
         raise EvaluationException(
-            message=f"{msg}. Got {type(o[field_name])}.",
-            internal_message=msg,
-            target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
-            category=ErrorCategory.MISSING_FIELD,
+            message=msg,
+            category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.USER_ERROR,
         )
@@ -270,3 +273,173 @@ def _validate_typed_dict(o: object, t: Type[T_TypedDict]) -> T_TypedDict:
         validate_annotation(v, annotations[k])
     return cast(T_TypedDict, o)
+def parse_quality_evaluator_reason_score(llm_output: str) -> Tuple[float, str]:
+    """Parse the output of prompt-based quality evaluators that return a score and reason.
+    Current supported evaluators:
+        - Fluency
+        - Relevance
+        - Retrieval
+        - Groundedness
+        - Coherence
+    :param llm_output: The output of the prompt-based quality evaluator.
+    :type llm_output: str
+    :return: The score and reason.
+    :rtype: Tuple[float, str]
+    """
+    score = math.nan
+    reason = ""
+    if llm_output:
+        try:
+            score_pattern = r"<S2>\D*?([1-5]).*?</S2>"
+            reason_pattern = r"<S1>(.*?)</S1>"
+            score_match = re.findall(score_pattern, llm_output, re.DOTALL)
+            reason_match = re.findall(reason_pattern, llm_output, re.DOTALL)
+            if score_match:
+                score = float(score_match[0].strip())
+            if reason_match:
+                reason = reason_match[0].strip()
+        except ValueError as exc:
+            raise EvaluationException(
+                message=f"Failed to parse model output: \n{llm_output}",
+                internal_message="Failed to parse model output.",
+                category=ErrorCategory.FAILED_EXECUTION,
+                blame=ErrorBlame.SYSTEM_ERROR,
+            ) from exc
+    return score, reason
+def remove_optional_singletons(eval_class, singletons):
+    required_singletons = singletons.copy()
+    if hasattr(eval_class, "_OPTIONAL_PARAMS"):  # pylint: disable=protected-access
+        for param in eval_class._OPTIONAL_PARAMS:  # pylint: disable=protected-access
+            if param in singletons:
+                del required_singletons[param]
+    return required_singletons
+def retrieve_content_type(assistant_messages: List, metric: str) -> str:
+    """Get the content type for service payload.
+    :param assistant_messages: The list of messages to be annotated by evaluation service
+    :type assistant_messages: list
+    :param metric: A string representing the metric type
+    :type metric: str
+    :return: A text representing the content type. Example: 'text', or 'image'
+    :rtype: str
+    """
+    # Check if metric is "protected_material"
+    if metric == "protected_material":
+        return "image"
+    # Iterate through each message
+    for message in assistant_messages:
+        # Ensure "content" exists in the message and is iterable
+        if isinstance(message.get("content", []), list):
+            for content in message.get("content", []):
+                if content.get("type") == "image_url":
+                    return "image"
+    # Default return if no image was found
+    return "text"
+def validate_conversation(conversation):
+    def raise_exception(msg, target):
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=target,
+            category=ErrorCategory.INVALID_VALUE,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    if not conversation or "messages" not in conversation:
+        raise_exception(
+            "Attribute 'messages' is missing in the request",
+            ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
+        )
+    messages = conversation["messages"]
+    if not isinstance(messages, list):
+        raise_exception(
+            "'messages' parameter must be a JSON-compatible list of chat messages",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )
+    expected_roles = {"user", "assistant", "system"}
+    image_found = False
+    assistant_message_count = 0
+    user_message_count = 0
+    for num, message in enumerate(messages, 1):
+        if not isinstance(message, dict):
+            try:
+                from azure.ai.inference.models import (
+                    ChatRequestMessage,
+                    UserMessage,
+                    AssistantMessage,
+                    SystemMessage,
+                    ImageContentItem,
+                )
+            except ImportError as ex:
+                raise MissingRequiredPackage(
+                    message="Please install 'azure-ai-inference' package to use SystemMessage, "
+                    "UserMessage or AssistantMessage."
+                ) from ex
+            if isinstance(message, ChatRequestMessage) and not isinstance(
+                message, (UserMessage, AssistantMessage, SystemMessage)
+            ):
+                raise_exception(
+                    f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
+                    ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+                )
+            if isinstance(message, AssistantMessage):
+                assistant_message_count += 1
+            if isinstance(message, UserMessage):
+                user_message_count += 1
+            if isinstance(message.content, list) and any(
+                isinstance(item, ImageContentItem) for item in message.content
+            ):
+                image_found = True
+            continue
+        if message.get("role") not in expected_roles:
+            raise_exception(
+                f"Invalid role provided: {message.get('role')}. Message number: {num}",
+                ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+            )
+        if message.get("role") == "assistant":
+            assistant_message_count += 1
+        if message.get("role") == "user":
+            user_message_count += 1
+        content = message.get("content")
+        if not isinstance(content, (str, list)):
+            raise_exception(
+                f"Content in each turn must be a string or array. Message number: {num}",
+                ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+            )
+        if isinstance(content, list):
+            if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
+                image_found = True
+    if not image_found:
+        raise_exception(
+            "Message needs to have multi-modal input like images.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )
+    if assistant_message_count == 0:
+        raise_exception(
+            "Assistant role required in one of the messages.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )
+    if user_message_count == 0:
+        raise_exception(
+            "User role required in one of the messages.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )
+    if assistant_message_count > 1:
+        raise_exception(
+            "Evaluators for multimodal conversations only support single turn. "
+            "User and assistant role expected as the only role in each message.",
+            ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
+        )

azure/ai/evaluation/_constants.py CHANGED Viewed

@@ -7,13 +7,13 @@ from typing import Literal
 class EvaluationMetrics:
     """Metrics for model evaluation."""
-    GPT_GROUNDEDNESS = "gpt_groundedness"
-    GPT_RELEVANCE = "gpt_relevance"
-    GPT_COHERENCE = "gpt_coherence"
-    GPT_FLUENCY = "gpt_fluency"
-    GPT_SIMILARITY = "gpt_similarity"
+    GROUNDEDNESS = "groundedness"
+    RELEVANCE = "relevance"
+    COHERENCE = "coherence"
+    FLUENCY = "fluency"
+    SIMILARITY = "similarity"
     F1_SCORE = "f1_score"
-    GPT_RETRIEVAL_SCORE = "gpt_retrieval_score"
+    RETRIEVAL_SCORE = "retrieval_score"
     HATE_FAIRNESS = "hate_fairness"
     HATE_UNFAIRNESS = "hate_unfairness"
     VIOLENCE = "violence"
@@ -62,6 +62,7 @@ CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
 PF_BATCH_TIMEOUT_SEC_DEFAULT = 3600
 PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC"
+PF_DISABLE_TRACING = "PF_DISABLE_TRACING"
 OTEL_EXPORTER_OTLP_TRACES_TIMEOUT = "OTEL_EXPORTER_OTLP_TRACES_TIMEOUT"
 OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT = 60

azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py RENAMED Viewed

@@ -1,8 +1,9 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from .batch_run_context import BatchRunContext
+from .eval_run_context import EvalRunContext
 from .code_client import CodeClient
 from .proxy_client import ProxyClient
+from .target_run_context import TargetRunContext
-__all__ = ["CodeClient", "ProxyClient", "BatchRunContext"]
+__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]

azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} RENAMED Viewed

@@ -14,6 +14,7 @@ from azure.ai.evaluation._constants import (
     OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT,
     PF_BATCH_TIMEOUT_SEC,
     PF_BATCH_TIMEOUT_SEC_DEFAULT,
+    PF_DISABLE_TRACING,
 )
 from ..._user_agent import USER_AGENT
@@ -22,13 +23,13 @@ from .code_client import CodeClient
 from .proxy_client import ProxyClient
-class BatchRunContext:
-    """Context manager for batch run clients.
+class EvalRunContext:
+    """Context manager for eval batch run.
     :param client: The client to run in the context.
     :type client: Union[
-        ~azure.ai.evaluation._evaluate._batch_run_client.code_client.CodeClient,
-        ~azure.ai.evaluation._evaluate._batch_run_client.proxy_client.ProxyClient
+        ~azure.ai.evaluation._evaluate._batch_run.code_client.CodeClient,
+        ~azure.ai.evaluation._evaluate._batch_run.proxy_client.ProxyClient
     ]
     """
@@ -36,8 +37,12 @@ class BatchRunContext:
         self.client = client
         self._is_batch_timeout_set_by_system = False
         self._is_otel_timeout_set_by_system = False
+        self._original_cwd = os.getcwd()
     def __enter__(self) -> None:
+        # Preserve current working directory, as PF may change it without restoring it afterward
+        self._original_cwd = os.getcwd()
         if isinstance(self.client, CodeClient):
             ClientUserAgentUtil.append_user_agent(USER_AGENT)
             inject_openai_api()
@@ -45,6 +50,7 @@ class BatchRunContext:
         if isinstance(self.client, ProxyClient):
             os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
             os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"
+            os.environ[PF_DISABLE_TRACING] = "true"
             if os.environ.get(PF_BATCH_TIMEOUT_SEC) is None:
                 os.environ[PF_BATCH_TIMEOUT_SEC] = str(PF_BATCH_TIMEOUT_SEC_DEFAULT)
@@ -64,12 +70,15 @@ class BatchRunContext:
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
     ) -> None:
+        os.chdir(self._original_cwd)
         if isinstance(self.client, CodeClient):
             recover_openai_api()
         if isinstance(self.client, ProxyClient):
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
             os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None)
+            os.environ.pop(PF_DISABLE_TRACING, None)
             if self._is_batch_timeout_set_by_system:
                 os.environ.pop(PF_BATCH_TIMEOUT_SEC, None)

azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py RENAMED Viewed

@@ -1,13 +1,16 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+# pylint: disable=protected-access
 import inspect
 import logging
 import math
 import os
+from collections import OrderedDict
 from concurrent.futures import Future
 from typing import Any, Callable, Dict, Optional, Union
-from collections import OrderedDict
 import pandas as pd
 from promptflow.client import PFClient
@@ -37,7 +40,7 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
         **kwargs
     ) -> ProxyRun:
         flow_to_run = flow
-        if hasattr(flow, "_to_async"):
+        if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
             flow_to_run = flow._to_async()  # pylint: disable=protected-access
         batch_use_async = self._should_batch_use_async(flow_to_run)
@@ -65,19 +68,29 @@ class ProxyClient:  # pylint: disable=client-accepts-api-version-keyword
         run = proxy_run.run.result()
         # pylint: disable=protected-access
+        completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
+        failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")
+        # Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines
+        if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0:
+            status = "Completed with Errors"
+        else:
+            status = run.status
+        # Return the ordered dictionary with the updated status
         return OrderedDict(
             [
-                ("status", run.status),
+                ("status", status),
                 ("duration", str(run._end_time - run._created_on)),
-                ("completed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")),
-                ("failed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")),
+                ("completed_lines", completed_lines),
+                ("failed_lines", failed_lines),
                 ("log_path", str(run._output_path)),
             ]
         )
     @staticmethod
     def _should_batch_use_async(flow):
-        if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
+        if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
             if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
                 return True
             if inspect.iscoroutinefunction(flow):

azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py ADDED Viewed

@@ -0,0 +1,46 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import os
+import types
+from typing import Optional, Type
+from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
+from azure.ai.evaluation._constants import PF_DISABLE_TRACING
+class TargetRunContext:
+    """Context manager for target batch run.
+    :param upload_snapshot: Whether to upload target snapshot.
+    :type upload_snapshot: bool
+    """
+    def __init__(self, upload_snapshot: bool) -> None:
+        self._upload_snapshot = upload_snapshot
+        self._original_cwd = os.getcwd()
+    def __enter__(self) -> None:
+        # Preserve current working directory, as PF may change it without restoring it afterward
+        self._original_cwd = os.getcwd()
+        # Address "[WinError 32] The process cannot access the file" error,
+        # caused by conflicts when the venv and target function are in the same directory.
+        # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
+        if not self._upload_snapshot:
+            os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
+        os.environ[PF_DISABLE_TRACING] = "true"
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        exc_tb: Optional[types.TracebackType],
+    ) -> None:
+        os.chdir(self._original_cwd)
+        if not self._upload_snapshot:
+            os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
+        os.environ.pop(PF_DISABLE_TRACING, None)

azure/ai/evaluation/_evaluate/_eval_run.py CHANGED Viewed

@@ -21,6 +21,7 @@ from azure.ai.evaluation._http_utils import get_http_client
 from azure.ai.evaluation._version import VERSION
 from azure.core.pipeline.policies import RetryPolicy
 from azure.core.rest import HttpResponse
+from azure.core.exceptions import HttpResponseError
 LOGGER = logging.getLogger(__name__)
@@ -33,14 +34,15 @@ try:
     from azure.ai.ml.entities._datastore.datastore import Datastore
     from azure.storage.blob import BlobServiceClient
 except (ModuleNotFoundError, ImportError):
-    # If the above mentioned modules cannot be imported, we are running
-    # in local mode and MLClient in the constructor will be None, so
-    # we will not arrive to Azure-dependent code.
-    # We are logging the import failure only if debug logging level is set because:
-    # - If the project configuration was not provided this import is not needed.
-    # - If the project configuration was provided, the error will be raised by PFClient.
-    LOGGER.debug("promptflow.azure is not installed.")
+    raise EvaluationException(  # pylint: disable=raise-missing-from
+        message=(
+            "The required packages for remote tracking are missing.\n"
+            'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
+        ),
+        target=ErrorTarget.EVALUATE,
+        category=ErrorCategory.MISSING_PACKAGE,
+        blame=ErrorBlame.USER_ERROR,
+    )
 @dataclasses.dataclass
@@ -102,7 +104,6 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
     _SCOPE = "https://management.azure.com/.default"
     EVALUATION_ARTIFACT = "instance_results.jsonl"
-    EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
     def __init__(
         self,
@@ -412,7 +413,7 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         """
         if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
             return
-        # Check if artifact dirrectory is empty or does not exist.
+        # Check if artifact directory is empty or does not exist.
         if not os.path.isdir(artifact_folder):
             LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
             return
@@ -443,15 +444,32 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         datastore = self._ml_client.datastores.get_default(include_secrets=True)
         account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
         svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
-        for local, remote in zip(local_paths, remote_paths["paths"]):
-            blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
-            with open(local, "rb") as fp:
-                blob_client.upload_blob(fp, overwrite=True)
+        try:
+            for local, remote in zip(local_paths, remote_paths["paths"]):
+                blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
+                with open(local, "rb") as fp:
+                    blob_client.upload_blob(fp, overwrite=True)
+        except HttpResponseError as ex:
+            if ex.status_code == 403:
+                msg = (
+                    "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
+                    " Please ensure that the necessary access rights are granted."
+                )
+                raise EvaluationException(
+                    message=msg,
+                    target=ErrorTarget.EVAL_RUN,
+                    category=ErrorCategory.FAILED_REMOTE_TRACKING,
+                    blame=ErrorBlame.USER_ERROR,
+                    tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
+                ) from ex
+            raise ex
         # To show artifact in UI we will need to register it. If it is a promptflow run,
         # we are rewriting already registered artifact and need to skip this step.
         if self._is_promptflow_run:
             return
         url = (
             f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
             f"/resourceGroups/{self._resource_group_name}/providers/"
@@ -474,6 +492,29 @@ class EvalRun(contextlib.AbstractContextManager):  # pylint: disable=too-many-in
         if response.status_code != 200:
             self._log_warning("register artifact", response)
+        # register artifacts for images if exists in image folder
+        try:
+            for remote_path in remote_paths["paths"]:
+                remote_file_path = remote_path["path"]
+                if "images" in os.path.normpath(remote_file_path).split(os.sep):
+                    response = self.request_with_retry(
+                        url=url,
+                        method="POST",
+                        json_dict={
+                            "origin": "ExperimentRun",
+                            "container": f"dcid.{self.info.run_id}",
+                            "path": posixpath.join("images", os.path.basename(remote_file_path)),
+                            "dataPath": {
+                                "dataStoreName": datastore.name,
+                                "relativePath": remote_file_path,
+                            },
+                        },
+                    )
+                    if response.status_code != 200:
+                        self._log_warning("register image artifact", response)
+        except Exception as ex:  # pylint: disable=broad-exception-caught
+            LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
     def _get_datastore_credential(self, datastore: "Datastore"):
         # Reference the logic in azure.ai.ml._artifact._artifact_utilities
         # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103

azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl

azure-ai-evaluation 1.0.0b4py3-none-any.whl → 1.0.1py3-none-any.whl