PyPI - azure-ai-evaluation - Versions diffs - 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

azure/ai/evaluation/__init__.py CHANGED Viewed

@@ -45,6 +45,7 @@ from ._aoai.aoai_grader import AzureOpenAIGrader
 from ._aoai.label_grader import AzureOpenAILabelGrader
 from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
 from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
+from ._aoai.score_model_grader import AzureOpenAIScoreModelGrader
 _patch_all = []
@@ -54,10 +55,19 @@ _patch_all = []
 # in ai.projects. So we only import it if it's available and the user has ai.projects.
 try:
     from ._converters._ai_services import AIAgentConverter
     _patch_all.append("AIAgentConverter")
 except ImportError:
-    print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
+    print(
+        "[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`."
+    )
+try:
+    from ._converters._sk_services import SKAgentConverter
+    _patch_all.append("SKAgentConverter")
+except ImportError:
+    print("[INFO] Could not import SKAgentConverter. Please install the dependency with `pip install semantic-kernel`.")
 __all__ = [
     "evaluate",
@@ -99,6 +109,7 @@ __all__ = [
     "AzureOpenAILabelGrader",
     "AzureOpenAIStringCheckGrader",
     "AzureOpenAITextSimilarityGrader",
+    "AzureOpenAIScoreModelGrader",
 ]
-__all__.extend([p for p in _patch_all if p not in __all__])
+__all__.extend([p for p in _patch_all if p not in __all__])

azure/ai/evaluation/_aoai/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ from .aoai_grader import AzureOpenAIGrader
 __all__ = [
     "AzureOpenAIGrader",
-]
+]

azure/ai/evaluation/_aoai/aoai_grader.py CHANGED Viewed

@@ -5,12 +5,13 @@ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfigurat
 from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
+from azure.ai.evaluation._user_agent import UserAgentSingleton
 from typing import Any, Dict, Union
 from azure.ai.evaluation._common._experimental import experimental
 @experimental
-class AzureOpenAIGrader():
+class AzureOpenAIGrader:
     """
     Base class for Azure OpenAI grader wrappers, recommended only for use by experienced OpenAI API users.
     Combines a model configuration and any grader configuration
@@ -35,9 +36,15 @@ class AzureOpenAIGrader():
     """
-    id = "aoai://general"
+    id = "azureai://built-in/evaluators/azure-openai/custom_grader"
-    def __init__(self, *, model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], grader_config: Dict[str, Any], **kwargs: Any):
+    def __init__(
+        self,
+        *,
+        model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+        grader_config: Dict[str, Any],
+        **kwargs: Any,
+    ):
         self._model_config = model_config
         self._grader_config = grader_config
@@ -45,8 +52,6 @@ class AzureOpenAIGrader():
             self._validate_model_config()
             self._validate_grader_config()
     def _validate_model_config(self) -> None:
         """Validate the model configuration that this grader wrapper is using."""
         if "api_key" not in self._model_config or not self._model_config.get("api_key"):
@@ -57,7 +62,7 @@ class AzureOpenAIGrader():
                 category=ErrorCategory.INVALID_VALUE,
                 target=ErrorTarget.AOAI_GRADER,
             )
     def _validate_grader_config(self) -> None:
         """Validate the grader configuration that this grader wrapper is using."""
@@ -71,19 +76,24 @@ class AzureOpenAIGrader():
         :return: The OpenAI client.
         :rtype: [~openai.OpenAI, ~openai.AzureOpenAI]
         """
+        default_headers = {"User-Agent": UserAgentSingleton().value}
         if "azure_endpoint" in self._model_config:
-           from openai import AzureOpenAI
-           # TODO set default values?
-           return AzureOpenAI(
+            from openai import AzureOpenAI
+            # TODO set default values?
+            return AzureOpenAI(
                 azure_endpoint=self._model_config["azure_endpoint"],
-                api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
-                api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
+                api_key=self._model_config.get("api_key", None),  # Default-style access to appease linters.
+                api_version=DEFAULT_AOAI_API_VERSION,  # Force a known working version
                 azure_deployment=self._model_config.get("azure_deployment", ""),
+                default_headers=default_headers,
             )
         from openai import OpenAI
         # TODO add default values for base_url and organization?
         return OpenAI(
             api_key=self._model_config["api_key"],
             base_url=self._model_config.get("base_url", ""),
             organization=self._model_config.get("organization", ""),
+            default_headers=default_headers,
         )

azure/ai/evaluation/_aoai/label_grader.py CHANGED Viewed

@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from .aoai_grader import AzureOpenAIGrader
 @experimental
 class AzureOpenAILabelGrader(AzureOpenAIGrader):
     """
@@ -42,12 +43,12 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
     """
-    id = "aoai://label_model"
+    id = "azureai://built-in/evaluators/azure-openai/label_grader"
     def __init__(
         self,
         *,
-        model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+        model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
         input: List[Dict[str, str]],
         labels: List[str],
         model: str,

azure/ai/evaluation/_aoai/score_model_grader.py ADDED Viewed

@@ -0,0 +1,90 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from typing import Any, Dict, Union, List, Optional
+from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
+from openai.types.graders import ScoreModelGrader
+from azure.ai.evaluation._common._experimental import experimental
+from .aoai_grader import AzureOpenAIGrader
+@experimental
+class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
+    """
+    Wrapper class for OpenAI's score model graders.
+    Enables continuous scoring evaluation with custom prompts and flexible
+    conversation-style inputs. Supports configurable score ranges and
+    pass thresholds for binary classification.
+    Supplying a ScoreModelGrader to the `evaluate` method will cause an
+    asynchronous request to evaluate the grader via the OpenAI API. The
+    results of the evaluation will then be merged into the standard
+    evaluation results.
+    :param model_config: The model configuration to use for the grader.
+    :type model_config: Union[
+        ~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+        ~azure.ai.evaluation.OpenAIModelConfiguration
+    ]
+    :param input: The input messages for the grader. List of conversation
+        messages with role and content.
+    :type input: List[Dict[str, str]]
+    :param model: The model to use for the evaluation.
+    :type model: str
+    :param name: The name of the grader.
+    :type name: str
+    :param range: The range of the score. Defaults to [0, 1].
+    :type range: Optional[List[float]]
+    :param pass_threshold: Score threshold for pass/fail classification.
+        Defaults to midpoint of range.
+    :type pass_threshold: Optional[float]
+    :param sampling_params: The sampling parameters for the model.
+    :type sampling_params: Optional[Dict[str, Any]]
+    :param kwargs: Additional keyword arguments to pass to the grader.
+    :type kwargs: Any
+    """
+    id = "azureai://built-in/evaluators/azure-openai/score_model_grader"
+    def __init__(
+        self,
+        *,
+        model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+        input: List[Dict[str, str]],
+        model: str,
+        name: str,
+        range: Optional[List[float]] = None,
+        pass_threshold: Optional[float] = None,
+        sampling_params: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ):
+        # Validate range and pass_threshold
+        if range is not None:
+            if len(range) != 2 or range[0] >= range[1]:
+                raise ValueError("range must be a list of two numbers [min, max] where min < max")
+        else:
+            range = [0.0, 1.0]  # Default range
+        if pass_threshold is not None:
+            if range and (pass_threshold < range[0] or pass_threshold > range[1]):
+                raise ValueError(f"pass_threshold {pass_threshold} must be within range {range}")
+        else:
+            pass_threshold = (range[0] + range[1]) / 2  # Default to midpoint
+        # Store pass_threshold as instance attribute
+        self.pass_threshold = pass_threshold
+        # Create OpenAI ScoreModelGrader instance
+        grader_kwargs = {"input": input, "model": model, "name": name, "type": "score_model"}
+        if range is not None:
+            grader_kwargs["range"] = range
+        if sampling_params is not None:
+            grader_kwargs["sampling_params"] = sampling_params
+        grader = ScoreModelGrader(**grader_kwargs)
+        super().__init__(model_config=model_config, grader_config=grader, **kwargs)

azure/ai/evaluation/_aoai/string_check_grader.py CHANGED Viewed

@@ -10,6 +10,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from .aoai_grader import AzureOpenAIGrader
 @experimental
 class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
     """
@@ -38,12 +39,12 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
     """
-    id = "aoai://string_check"
+    id = "azureai://built-in/evaluators/azure-openai/string_check_grader"
     def __init__(
         self,
         *,
-        model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+        model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
         input: str,
         name: str,
         operation: Literal[

azure/ai/evaluation/_aoai/text_similarity_grader.py CHANGED Viewed

@@ -10,6 +10,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from .aoai_grader import AzureOpenAIGrader
 @experimental
 class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
     """
@@ -52,12 +53,12 @@ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
     """
-    id = "aoai://text_similarity"
+    id = "azureai://built-in/evaluators/azure-openai/text_similarity_grader"
     def __init__(
         self,
         *,
-        model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+        model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
         evaluation_metric: Literal[
             "fuzzy_match",
             "bleu",

azure/ai/evaluation/_azure/_envs.py CHANGED Viewed

@@ -19,6 +19,7 @@ from azure.core.pipeline.policies import ProxyPolicy, AsyncRetryPolicy
 class AzureEnvironmentMetadata(TypedDict):
     """Configuration for various Azure environments. All endpoints include a trailing slash."""
     portal_endpoint: str
     """The management portal for the Azure environment (e.g. https://portal.azure.com/)"""
     resource_manager_endpoint: str
@@ -107,15 +108,15 @@ class AzureEnvironmentClient:
         def case_insensitive_match(d: Mapping[str, Any], key: str) -> Optional[Any]:
             key = key.strip().lower()
-            return next((v for k,v in d.items() if k.strip().lower() == key), None)
+            return next((v for k, v in d.items() if k.strip().lower() == key), None)
         async with _ASYNC_LOCK:
             cloud = _KNOWN_AZURE_ENVIRONMENTS.get(name) or case_insensitive_match(_KNOWN_AZURE_ENVIRONMENTS, name)
             if cloud:
                 return cloud
-            default_endpoint = (_KNOWN_AZURE_ENVIRONMENTS
-                .get(_DEFAULT_AZURE_ENV_NAME, {})
-                .get("resource_manager_endpoint"))
+            default_endpoint = _KNOWN_AZURE_ENVIRONMENTS.get(_DEFAULT_AZURE_ENV_NAME, {}).get(
+                "resource_manager_endpoint"
+            )
         metadata_url = self.get_default_metadata_url(default_endpoint)
         clouds = await self.get_clouds_async(metadata_url=metadata_url, update_cached=update_cached)
@@ -124,10 +125,7 @@ class AzureEnvironmentClient:
         return cloud_metadata
     async def get_clouds_async(
-        self,
-        *,
-        metadata_url: Optional[str] = None,
-        update_cached: bool = True
+        self, *, metadata_url: Optional[str] = None, update_cached: bool = True
     ) -> Mapping[str, AzureEnvironmentMetadata]:
         metadata_url = metadata_url or self.get_default_metadata_url()
@@ -149,7 +147,8 @@ class AzureEnvironmentClient:
         default_endpoint = default_endpoint or "https://management.azure.com/"
         metadata_url = os.getenv(
             _ENV_ARM_CLOUD_METADATA_URL,
-            f"{default_endpoint}metadata/endpoints?api-version={AzureEnvironmentClient.DEFAULT_API_VERSION}")
+            f"{default_endpoint}metadata/endpoints?api-version={AzureEnvironmentClient.DEFAULT_API_VERSION}",
+        )
         return metadata_url
     @staticmethod
@@ -197,7 +196,7 @@ class AzureEnvironmentClient:
 def recursive_update(d: Dict, u: Mapping) -> None:
     """Recursively update a dictionary.
     :param Dict d: The dictionary to update.
     :param Mapping u: The mapping to update from.
     """

azure/ai/evaluation/_azure/_token_manager.py CHANGED Viewed

@@ -73,7 +73,13 @@ class AzureMLTokenManager(APITokenManager):
             return super().get_aad_credential()
     def get_token(
-            self, scopes = None, claims: Union[str, None] = None, tenant_id: Union[str, None] = None, enable_cae: bool = False, **kwargs: Any) -> AccessToken:
+        self,
+        scopes=None,
+        claims: Union[str, None] = None,
+        tenant_id: Union[str, None] = None,
+        enable_cae: bool = False,
+        **kwargs: Any
+    ) -> AccessToken:
         """Get the API token. If the token is not available or has expired, refresh the token.
         :return: API token

azure/ai/evaluation/_common/constants.py CHANGED Viewed

@@ -5,8 +5,17 @@ from enum import Enum
 from azure.core import CaseInsensitiveEnumMeta
-PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency", "intent_resolution",
-                                  "tool_call_accurate", "response_completeness", "task_adherence"]
+PROMPT_BASED_REASON_EVALUATORS = [
+    "coherence",
+    "relevance",
+    "retrieval",
+    "groundedness",
+    "fluency",
+    "intent_resolution",
+    "tool_call_accurate",
+    "response_completeness",
+    "task_adherence",
+]
 class CommonConstants:

azure/ai/evaluation/_common/evaluation_onedp_client.py CHANGED Viewed

@@ -6,13 +6,22 @@ import logging
 from typing import Union, Any, Dict
 from azure.core.credentials import AzureKeyCredential, TokenCredential
 from azure.ai.evaluation._common.onedp import AIProjectClient as RestEvaluationServiceClient
-from azure.ai.evaluation._common.onedp.models import (PendingUploadRequest, PendingUploadType, EvaluationResult,
-                                                      ResultType, AssetCredentialRequest, EvaluationUpload, InputDataset, RedTeamUpload)
+from azure.ai.evaluation._common.onedp.models import (
+    PendingUploadRequest,
+    PendingUploadType,
+    EvaluationResult,
+    ResultType,
+    AssetCredentialRequest,
+    EvaluationUpload,
+    InputDataset,
+    RedTeamUpload,
+)
 from azure.storage.blob import ContainerClient
 from .utils import upload
 LOGGER = logging.getLogger(__name__)
 class EvaluationServiceOneDPClient:
     def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
@@ -23,7 +32,15 @@ class EvaluationServiceOneDPClient:
         )
     def create_evaluation_result(
-            self, *, name: str, path: str, version=1, metrics: Dict[str, int]=None, result_type: ResultType=ResultType.EVALUATION, **kwargs) -> EvaluationResult:
+        self,
+        *,
+        name: str,
+        path: str,
+        version=1,
+        metrics: Dict[str, int] = None,
+        result_type: ResultType = ResultType.EVALUATION,
+        **kwargs,
+    ) -> EvaluationResult:
         """Create and upload evaluation results to Azure evaluation service.
         This method uploads evaluation results from a local path to Azure Blob Storage
@@ -49,17 +66,20 @@ class EvaluationServiceOneDPClient:
         :raises: Various exceptions from the underlying API calls or upload process
         """
-        LOGGER.debug(f"Creating evaluation result for {name} with version {version} type {result_type} from path {path}")
+        LOGGER.debug(
+            f"Creating evaluation result for {name} with version {version} type {result_type} from path {path}"
+        )
         start_pending_upload_response = self.rest_client.evaluation_results.start_pending_upload(
             name=name,
             version=version,
             body=PendingUploadRequest(pending_upload_type=PendingUploadType.TEMPORARY_BLOB_REFERENCE),
-            **kwargs
+            **kwargs,
         )
         LOGGER.debug(f"Uploading {path} to {start_pending_upload_response.blob_reference_for_consumption.blob_uri}")
         with ContainerClient.from_container_url(
-            start_pending_upload_response.blob_reference_for_consumption.credential.sas_uri) as container_client:
+            start_pending_upload_response.blob_reference_for_consumption.credential.sas_uri
+        ) as container_client:
             upload(path=path, container_client=container_client, logger=LOGGER)
         LOGGER.debug(f"Creating evaluation result version for {name} with version {version}")
@@ -73,7 +93,7 @@ class EvaluationServiceOneDPClient:
             ),
             name=name,
             version=version,
-            **kwargs
+            **kwargs,
         )
         return create_version_response
@@ -90,10 +110,7 @@ class EvaluationServiceOneDPClient:
         :rtype: EvaluationUpload
         :raises: Various exceptions from the underlying API calls
         """
-        upload_run_response = self.rest_client.evaluations.upload_run(
-            evaluation=evaluation,
-            **kwargs
-        )
+        upload_run_response = self.rest_client.evaluations.upload_run(evaluation=evaluation, **kwargs)
         return upload_run_response
@@ -112,11 +129,7 @@ class EvaluationServiceOneDPClient:
         :rtype: EvaluationUpload
         :raises: Various exceptions from the underlying API calls
         """
-        update_run_response = self.rest_client.evaluations.upload_update_run(
-            name=name,
-            evaluation=evaluation,
-            **kwargs
-        )
+        update_run_response = self.rest_client.evaluations.upload_update_run(name=name, evaluation=evaluation, **kwargs)
         return update_run_response
@@ -132,10 +145,7 @@ class EvaluationServiceOneDPClient:
         :rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
         :raises: Various exceptions from the underlying API calls
         """
-        upload_run_response = self.rest_client.red_teams.upload_run(
-            redteam=red_team,
-            **kwargs
-        )
+        upload_run_response = self.rest_client.red_teams.upload_run(redteam=red_team, **kwargs)
         return upload_run_response
@@ -154,10 +164,6 @@ class EvaluationServiceOneDPClient:
         :rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
         :raises: Various exceptions from the underlying API calls
         """
-        update_run_response = self.rest_client.red_teams.upload_update_run(
-            name=name,
-            redteam=red_team,
-            **kwargs
-        )
+        update_run_response = self.rest_client.red_teams.upload_update_run(name=name, redteam=red_team, **kwargs)
-        return update_run_response
+        return update_run_response

azure/ai/evaluation/_common/onedp/__init__.py CHANGED Viewed

@@ -1,32 +1,32 @@
-# coding=utf-8
-# --------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License. See License.txt in the project root for license information.
-# Code generated by Microsoft (R) Python Code Generator.
-# Changes may cause incorrect behavior and will be lost if the code is regenerated.
-# --------------------------------------------------------------------------
-# pylint: disable=wrong-import-position
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from ._patch import *  # pylint: disable=unused-wildcard-import
-from ._client import AIProjectClient  # type: ignore
-from ._version import VERSION
-__version__ = VERSION
-try:
-    from ._patch import __all__ as _patch_all
-    from ._patch import *
-except ImportError:
-    _patch_all = []
-from ._patch import patch_sdk as _patch_sdk
-__all__ = [
-    "AIProjectClient",
-]
-__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
-_patch_sdk()
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+from ._client import AIProjectClient  # type: ignore
+from ._version import VERSION
+__version__ = VERSION
+try:
+    from ._patch import __all__ as _patch_all
+    from ._patch import *
+except ImportError:
+    _patch_all = []
+from ._patch import patch_sdk as _patch_sdk
+__all__ = [
+    "AIProjectClient",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+_patch_sdk()

azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.9.0py3-none-any.whl