PyPI - azure-ai-evaluation - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (123) hide show

azure/ai/evaluation/__init__.py CHANGED Viewed

@@ -17,14 +17,20 @@ from ._evaluators._fluency import FluencyEvaluator
 from ._evaluators._gleu import GleuScoreEvaluator
 from ._evaluators._groundedness import GroundednessEvaluator
 from ._evaluators._service_groundedness import GroundednessProEvaluator
+from ._evaluators._intent_resolution import IntentResolutionEvaluator
 from ._evaluators._meteor import MeteorScoreEvaluator
 from ._evaluators._protected_material import ProtectedMaterialEvaluator
 from ._evaluators._qa import QAEvaluator
+from ._evaluators._response_completeness import ResponseCompletenessEvaluator
+from ._evaluators._task_adherence import TaskAdherenceEvaluator
 from ._evaluators._relevance import RelevanceEvaluator
 from ._evaluators._retrieval import RetrievalEvaluator
 from ._evaluators._rouge import RougeScoreEvaluator, RougeType
 from ._evaluators._similarity import SimilarityEvaluator
 from ._evaluators._xpia import IndirectAttackEvaluator
+from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
+from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
+from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
 from ._model_configurations import (
     AzureAIProject,
     AzureOpenAIModelConfiguration,
@@ -35,6 +41,34 @@ from ._model_configurations import (
     OpenAIModelConfiguration,
 )
+_patch_all = []
+# The converter from the AI service to the evaluator schema requires a dependency on
+# ai.projects, but we also don't want to force users installing ai.evaluations to pull
+# in ai.projects. So we only import it if it's available and the user has ai.projects.
+try:
+    from ._converters._ai_services import AIAgentConverter
+    _patch_all.append("AIAgentConverter")
+except ImportError:
+    print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
+# RedTeam requires a dependency on pyrit, but python 3.9 is not supported by pyrit.
+# So we only import it if it's available and the user has pyrit.
+try:
+    from ._red_team._red_team import RedTeam
+    from ._red_team._attack_strategy import AttackStrategy
+    from ._red_team._attack_objective_generator import RiskCategory
+    from ._red_team._red_team_result import RedTeamOutput
+    _patch_all.extend([
+        "RedTeam",
+        "RedTeamOutput",
+        "AttackStrategy",
+        "RiskCategory",
+    ])
+except ImportError:
+    print("[INFO] Could not import RedTeam. Please install the dependency with `pip install azure-ai-evaluation[redteam]`.")
 __all__ = [
     "evaluate",
     "CoherenceEvaluator",
@@ -42,6 +76,9 @@ __all__ = [
     "FluencyEvaluator",
     "GroundednessEvaluator",
     "GroundednessProEvaluator",
+    "ResponseCompletenessEvaluator",
+    "TaskAdherenceEvaluator",
+    "IntentResolutionEvaluator",
     "RelevanceEvaluator",
     "SimilarityEvaluator",
     "QAEvaluator",
@@ -64,5 +101,10 @@ __all__ = [
     "EvaluatorConfig",
     "Conversation",
     "Message",
-    "EvaluationResult"
+    "EvaluationResult",
+    "CodeVulnerabilityEvaluator",
+    "UngroundedAttributesEvaluator",
+    "ToolCallAccuracyEvaluator",
 ]
+__all__.extend([p for p in _patch_all if p not in __all__])

azure/ai/evaluation/_azure/_models.py CHANGED Viewed

@@ -52,7 +52,7 @@ class Workspace(Model):
         "agents_endpoint_uri": {"readonly": True},
         "ml_flow_tracking_uri": {"readonly": True},
         #'notebook_info': {'readonly': True},
-        "private_endpoint_connections": {"readonly": True},
+        # "private_endpoint_connections": {"readonly": True},
         #'private_link_count': {'readonly': True},
         "provisioning_state": {"readonly": True},
         "service_provisioned_resource_group": {"readonly": True},
@@ -99,10 +99,10 @@ class Workspace(Model):
         #'network_acls': {'key': 'properties.networkAcls', 'type': 'NetworkAcls'},
         #'notebook_info': {'key': 'properties.notebookInfo', 'type': 'NotebookResourceInfo'},
         "primary_user_assigned_identity": {"key": "properties.primaryUserAssignedIdentity", "type": "str"},
-        "private_endpoint_connections": {
-            "key": "properties.privateEndpointConnections",
-            "type": "[PrivateEndpointConnection]",
-        },
+        # "private_endpoint_connections": {
+        #    "key": "properties.privateEndpointConnections",
+        #    "type": "[PrivateEndpointConnection]",
+        # },
         "private_link_count": {"key": "properties.privateLinkCount", "type": "int"},
         "provision_network_now": {"key": "properties.provisionNetworkNow", "type": "bool"},
         "provisioning_state": {"key": "properties.provisioningState", "type": "str"},
@@ -207,7 +207,7 @@ class Workspace(Model):
         # self.network_acls = network_acls
         # self.notebook_info = None
         self.primary_user_assigned_identity = primary_user_assigned_identity
-        self.private_endpoint_connections = None
+        # self.private_endpoint_connections = None
         self.private_link_count = None
         self.provision_network_now = provision_network_now
         self.provisioning_state = None

azure/ai/evaluation/_common/constants.py CHANGED Viewed

@@ -5,8 +5,8 @@ from enum import Enum
 from azure.core import CaseInsensitiveEnumMeta
-PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency"]
+PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency", "intent_resolution",
+                                  "tool_call_accurate", "response_completeness", "task_adherence"]
 class CommonConstants:
@@ -39,6 +39,8 @@ class Tasks:
     PROTECTED_MATERIAL = "protected material"
     XPIA = "xpia"
     GROUNDEDNESS = "groundedness"
+    CODE_VULNERABILITY = "code vulnerability"
+    UNGROUNDED_ATTRIBUTES = "inference sensitive attributes"
 class _InternalAnnotationTasks:
@@ -61,6 +63,8 @@ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     PROTECTED_MATERIAL = "protected_material"
     XPIA = "xpia"
     GROUNDEDNESS = "generic_groundedness"
+    CODE_VULNERABILITY = "code_vulnerability"
+    UNGROUNDED_ATTRIBUTES = "ungrounded_attributes"
 class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):

azure/ai/evaluation/_common/rai_service.py CHANGED Viewed

@@ -42,6 +42,7 @@ USER_TEXT_TEMPLATE_DICT: Dict[str, Template] = {
     "DEFAULT": Template("<Human>{$query}</><System>{$response}</>"),
 }
+INFERENCE_OF_SENSITIVE_ATTRIBUTES = "inference_sensitive_attributes"
 def get_formatted_template(data: dict, annotation_task: str) -> str:
     """Given the task and input data, produce a formatted string that will serve as the main
@@ -64,6 +65,19 @@ def get_formatted_template(data: dict, annotation_task: str) -> str:
             "context": data.get("context", ""),
         }
         return json.dumps(as_dict)
+    if annotation_task == Tasks.CODE_VULNERABILITY:
+        as_dict = {
+            "context": data.get("query", ""),
+            "completion": data.get("response", "")
+        }
+        return json.dumps(as_dict)
+    if annotation_task == Tasks.UNGROUNDED_ATTRIBUTES:
+        as_dict = {
+            "query": data.get("query", ""),
+            "response": data.get("response", ""),
+            "context": data.get("context", "")
+        }
+        return json.dumps(as_dict)
     as_dict = {
         "query": html.escape(data.get("query", "")),
         "response": html.escape(data.get("response", "")),
@@ -160,6 +174,8 @@ def generate_payload(normalized_user_text: str, metric: str, annotation_task: st
     task = annotation_task
     if metric == EvaluationMetrics.PROTECTED_MATERIAL:
         include_metric = False
+    elif metric == EvaluationMetrics.UNGROUNDED_ATTRIBUTES:
+        include_metric = False
     elif metric == _InternalEvaluationMetrics.ECI:
         include_metric = False
     elif metric == EvaluationMetrics.XPIA:
@@ -251,7 +267,6 @@ async def fetch_result(operation_id: str, rai_svc_url: str, credential: TokenCre
         sleep_time = RAIService.SLEEP_TIME**request_count
         await asyncio.sleep(sleep_time)
 def parse_response(  # pylint: disable=too-many-branches,too-many-statements
     batch_response: List[Dict], metric_name: str, metric_display_name: Optional[str] = None
 ) -> Dict[str, Union[str, float]]:
@@ -274,10 +289,16 @@ def parse_response(  # pylint: disable=too-many-branches,too-many-statements
         EvaluationMetrics.PROTECTED_MATERIAL,
         _InternalEvaluationMetrics.ECI,
         EvaluationMetrics.XPIA,
+        EvaluationMetrics.CODE_VULNERABILITY,
+        EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
     }:
         result = {}
         if not batch_response or len(batch_response[0]) == 0:
             return {}
+        if metric_name == EvaluationMetrics.UNGROUNDED_ATTRIBUTES and INFERENCE_OF_SENSITIVE_ATTRIBUTES in batch_response[0]:
+            batch_response[0] = {
+                EvaluationMetrics.UNGROUNDED_ATTRIBUTES: batch_response[0][INFERENCE_OF_SENSITIVE_ATTRIBUTES]
+            }
         if metric_name == EvaluationMetrics.PROTECTED_MATERIAL and metric_name not in batch_response[0]:
             pm_metric_names = {"artwork", "fictional_characters", "logos_and_brands"}
             for pm_metric_name in pm_metric_names:
@@ -313,6 +334,13 @@ def parse_response(  # pylint: disable=too-many-branches,too-many-statements
             result[metric_display_name + "_information_gathering"] = (
                 parsed_response["information_gathering"] if "information_gathering" in parsed_response else math.nan
             )
+        if metric_name == EvaluationMetrics.CODE_VULNERABILITY or metric_name == EvaluationMetrics.UNGROUNDED_ATTRIBUTES:
+            # Add all attributes under the details.
+            details = {}
+            for key, value in parsed_response.items():
+                if key not in {"label", "reasoning", "version"}:
+                    details[key.replace("-", "_")] = value
+            result[metric_display_name + "_details"] = details
         return result
     return _parse_content_harm_response(batch_response, metric_name, metric_display_name)
@@ -359,7 +387,14 @@ def _parse_content_harm_response(
         # get content harm metric_value
         if "label" in harm_response:
-            metric_value = float(harm_response["label"])
+            try:
+                # Handle "n/a" or other non-numeric values
+                if isinstance(harm_response["label"], str) and harm_response["label"].strip().lower() == "n/a":
+                    metric_value = math.nan
+                else:
+                    metric_value = float(harm_response["label"])
+            except (ValueError, TypeError):
+                metric_value = math.nan
         elif "valid" in harm_response:
             metric_value = 0 if harm_response["valid"] else math.nan
         else:
@@ -390,8 +425,7 @@ def _parse_content_harm_response(
         reason = ""
     harm_score = metric_value
-    if metric_value == "n/a":
-        return result
+    # We've already handled the "n/a" case by converting to math.nan
     if not math.isnan(metric_value):
         # int(math.nan) causes a value error, and math.nan is already handled
         # by get_harm_severity_level

azure/ai/evaluation/_common/raiclient/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+from ._client import MachineLearningServicesClient  # type: ignore
+from ._version import VERSION
+__version__ = VERSION
+try:
+    from ._patch import __all__ as _patch_all
+    from ._patch import *
+except ImportError:
+    _patch_all = []
+from ._patch import patch_sdk as _patch_sdk
+# Export GeneratedRAIClient as alias of MachineLearningServicesClient for backward compatibility
+__all__ = [
+    "MachineLearningServicesClient",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+_patch_sdk()

azure/ai/evaluation/_common/raiclient/_client.py ADDED Viewed

@@ -0,0 +1,128 @@
+# pylint: disable=line-too-long,useless-suppression
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+from copy import deepcopy
+from typing import Any, TYPE_CHECKING
+from typing_extensions import Self
+from azure.core import PipelineClient
+from azure.core.pipeline import policies
+from azure.core.rest import HttpRequest, HttpResponse
+from ._configuration import MachineLearningServicesClientConfiguration
+from ._serialization import Deserializer, Serializer
+from .operations import RAISvcOperations
+if TYPE_CHECKING:
+    from azure.core.credentials import TokenCredential
+class MachineLearningServicesClient:
+    """MachineLearningServicesClient.
+    :ivar rai_svc: RAISvcOperations operations
+    :vartype rai_svc: raiclient.operations.RAISvcOperations
+    :param endpoint: Supported Azure-AI endpoints. Required.
+    :type endpoint: str
+    :param subscription_id: The ID of the target subscription. Required.
+    :type subscription_id: str
+    :param resource_group_name: The name of the Resource Group. Required.
+    :type resource_group_name: str
+    :param workspace_name: The name of the AzureML workspace or AI project. Required.
+    :type workspace_name: str
+    :param credential: Credential used to authenticate requests to the service. Required.
+    :type credential: ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2022-11-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+    def __init__(
+        self,
+        endpoint: str,
+        subscription_id: str,
+        resource_group_name: str,
+        workspace_name: str,
+        credential: "TokenCredential",
+        **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}/raisvc/v1.0/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}"
+        self._config = MachineLearningServicesClientConfiguration(
+            endpoint=endpoint,
+            subscription_id=subscription_id,
+            resource_group_name=resource_group_name,
+            workspace_name=workspace_name,
+            credential=credential,
+            **kwargs
+        )
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: PipelineClient = PipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+        self.rai_svc = RAISvcOperations(self._client, self._config, self._serialize, self._deserialize)
+    def send_request(self, request: HttpRequest, *, stream: bool = False, **kwargs: Any) -> HttpResponse:
+        """Runs the network request through the client's chained policies.
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = client.send_request(request)
+        <HttpResponse: 200 OK>
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.HttpResponse
+        """
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+            "subscriptionId": self._serialize.url("self._config.subscription_id", self._config.subscription_id, "str"),
+            "resourceGroupName": self._serialize.url(
+                "self._config.resource_group_name", self._config.resource_group_name, "str"
+            ),
+            "workspaceName": self._serialize.url("self._config.workspace_name", self._config.workspace_name, "str"),
+        }
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+    def close(self) -> None:
+        self._client.close()
+    def __enter__(self) -> Self:
+        self._client.__enter__()
+        return self
+    def __exit__(self, *exc_details: Any) -> None:
+        self._client.__exit__(*exc_details)

azure/ai/evaluation/_common/raiclient/_configuration.py ADDED Viewed

@@ -0,0 +1,87 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+from typing import Any, TYPE_CHECKING
+from azure.core.pipeline import policies
+from ._version import VERSION
+if TYPE_CHECKING:
+    from azure.core.credentials import TokenCredential
+class MachineLearningServicesClientConfiguration:  # pylint: disable=too-many-instance-attributes,name-too-long
+    """Configuration for MachineLearningServicesClient.
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+    :param endpoint: Supported Azure-AI endpoints. Required.
+    :type endpoint: str
+    :param subscription_id: The ID of the target subscription. Required.
+    :type subscription_id: str
+    :param resource_group_name: The name of the Resource Group. Required.
+    :type resource_group_name: str
+    :param workspace_name: The name of the AzureML workspace or AI project. Required.
+    :type workspace_name: str
+    :param credential: Credential used to authenticate requests to the service. Required.
+    :type credential: ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2022-11-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+    def __init__(
+        self,
+        endpoint: str,
+        subscription_id: str,
+        resource_group_name: str,
+        workspace_name: str,
+        credential: "TokenCredential",
+        **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2022-11-01-preview")
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if subscription_id is None:
+            raise ValueError("Parameter 'subscription_id' must not be None.")
+        if resource_group_name is None:
+            raise ValueError("Parameter 'resource_group_name' must not be None.")
+        if workspace_name is None:
+            raise ValueError("Parameter 'workspace_name' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+        self.endpoint = endpoint
+        self.subscription_id = subscription_id
+        self.resource_group_name = resource_group_name
+        self.workspace_name = workspace_name
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "rai_client/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.RedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.RetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = policies.BearerTokenCredentialPolicy(
+                self.credential, *self.credential_scopes, **kwargs
+            )

azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl