PyPI - azure-ai-evaluation - Versions diffs - 0.0.0b0__py3-none-any.whl → 1.0.0b1__py3-none-any.whl - Mend

azure-ai-evaluation 0.0.0b0py3-none-any.whl → 1.0.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (100) hide show

azure/ai/evaluation/simulator/_model_tools/_identity_manager.py ADDED Viewed

@@ -0,0 +1,147 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import asyncio
+import logging
+import os
+import time
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Dict, Optional, Union
+from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
+AZURE_TOKEN_REFRESH_INTERVAL = 600  # seconds
+class TokenScope(Enum):
+    """Token scopes for Azure endpoints"""
+    DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
+class APITokenManager(ABC):
+    """Base class for managing API tokens. Subclasses should implement the get_token method.
+    :param logger: Logger object
+    :type logger: logging.Logger
+    :param auth_header: Authorization header prefix. Defaults to "Bearer"
+    :type auth_header: str
+    :param credential: Azure credential object
+    :type credential: Optional[Union[azure.identity.DefaultAzureCredential, azure.identity.ManagedIdentityCredential]
+    """
+    def __init__(
+        self,
+        logger: logging.Logger,
+        auth_header: str = "Bearer",
+        credential: Optional[Union[DefaultAzureCredential, ManagedIdentityCredential]] = None,
+    ) -> None:
+        self.logger = logger
+        self.auth_header = auth_header
+        self._lock = None
+        if credential is not None:
+            self.credential = credential
+        else:
+            self.credential = self.get_aad_credential()
+        self.token = None
+        self.last_refresh_time = None
+    @property
+    def lock(self) -> asyncio.Lock:
+        """Return object for managing concurrent access to the token.
+        If the lock object does not exist, it will be created first.
+        :return: Lock object
+        :rtype: asyncio.Lock
+        """
+        if self._lock is None:
+            self._lock = asyncio.Lock()
+        return self._lock
+    def get_aad_credential(self) -> Union[DefaultAzureCredential, ManagedIdentityCredential]:
+        """Return the AAD credential object.
+        If the environment variable DEFAULT_IDENTITY_CLIENT_ID is set, ManagedIdentityCredential will be used with
+        the specified client ID. Otherwise, DefaultAzureCredential will be used.
+        :return: The AAD credential object
+        :rtype: Union[DefaultAzureCredential, ManagedIdentityCredential]
+        """
+        identity_client_id = os.environ.get("DEFAULT_IDENTITY_CLIENT_ID", None)
+        if identity_client_id is not None:
+            self.logger.info(f"Using DEFAULT_IDENTITY_CLIENT_ID: {identity_client_id}")
+            credential = ManagedIdentityCredential(client_id=identity_client_id)
+        else:
+            self.logger.info("Environment variable DEFAULT_IDENTITY_CLIENT_ID is not set, using DefaultAzureCredential")
+            credential = DefaultAzureCredential()
+        return credential
+    @abstractmethod
+    async def get_token(self) -> str:
+        """Async method to get the API token. Subclasses should implement this method.
+        :return: API token
+        :rtype: str
+        """
+        pass  # pylint: disable=unnecessary-pass
+class ManagedIdentityAPITokenManager(APITokenManager):
+    """API Token Manager for Azure Managed Identity
+    :param token_scope: Token scope for Azure endpoint
+    :type token_scope: ~azure.ai.evaluation.simulator._model_tools.TokenScope
+    :param logger: Logger object
+    :type logger: logging.Logger
+    :keyword kwargs: Additional keyword arguments
+    :paramtype kwargs: Dict
+    """
+    def __init__(self, token_scope: TokenScope, logger: logging.Logger, **kwargs: Dict):
+        super().__init__(logger, **kwargs)
+        self.token_scope = token_scope
+    # Bug 3353724: This get_token is sync method, but it is defined as async method in the base class
+    def get_token(self) -> str:  # pylint: disable=invalid-overridden-method
+        """Get the API token. If the token is not available or has expired, refresh the token.
+        :return: API token
+        :rtype: str
+        """
+        if (
+            self.token is None
+            or self.last_refresh_time is None
+            or time.time() - self.last_refresh_time > AZURE_TOKEN_REFRESH_INTERVAL
+        ):
+            self.last_refresh_time = time.time()
+            self.token = self.credential.get_token(self.token_scope.value).token
+            self.logger.info("Refreshed Azure endpoint token.")
+        return self.token
+class PlainTokenManager(APITokenManager):
+    """Plain API Token Manager
+    :param openapi_key: OpenAPI key
+    :type openapi_key: str
+    :param logger: Logger object
+    :type logger: logging.Logger
+    :keyword kwargs: Optional keyword arguments
+    :paramtype kwargs: Dict
+    """
+    def __init__(self, openapi_key: str, logger: logging.Logger, **kwargs: Dict):
+        super().__init__(logger, **kwargs)
+        self.token = openapi_key
+    async def get_token(self) -> str:
+        """Get the API token
+        :return: API token
+        :rtype: str
+        """
+        return self.token

azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py ADDED Viewed

@@ -0,0 +1,228 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import asyncio
+import copy
+import json
+import time
+import uuid
+from typing import Dict, List
+from azure.core.exceptions import HttpResponseError
+from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
+from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
+from azure.ai.evaluation._user_agent import USER_AGENT
+from .models import OpenAIChatCompletionsModel
+class SimulationRequestDTO:
+    """Simulation Request Data Transfer Object
+    :param url: The URL to send the request to.
+    :type url: str
+    :param headers: The headers to send with the request.
+    :type headers: Dict[str, str]
+    :param payload: The payload to send with the request.
+    :type payload: Dict[str, Any]
+    :param params: The parameters to send with the request.
+    :type params: Dict[str, str]
+    :param template_key: The template key to use for the request.
+    :type template_key: str
+    :param template_parameters: The template parameters to use for the request.
+    :type template_parameters: Dict
+    """
+    def __init__(self, url, headers, payload, params, templatekey, template_parameters):
+        self.url = url
+        self.headers = headers
+        self.json = json.dumps(payload)
+        self.params = params
+        self.templatekey = templatekey
+        self.templateParameters = template_parameters
+    def to_dict(self) -> Dict:
+        """Convert the DTO to a dictionary.
+        :return: The DTO as a dictionary.
+        :rtype: Dict
+        """
+        if self.templateParameters is not None:
+            self.templateParameters = {str(k): str(v) for k, v in self.templateParameters.items()}
+        return self.__dict__
+    def to_json(self):
+        """Convert the DTO to a JSON string.
+        :return: The DTO as a JSON string.
+        :rtype: str
+        """
+        return json.dumps(self.__dict__)
+class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
+    """A chat completion model that uses a proxy to query the model with a body of data.
+    :param name: The name of the model.
+    :type name: str
+    :param template_key: The template key to use for the request.
+    :type template_key: str
+    :param template_parameters: The template parameters to use for the request.
+    :type template_parameters: Dict
+    :keyword args: Additional arguments to pass to the parent class.
+    :keyword kwargs: Additional keyword arguments to pass to the parent class.
+    """
+    def __init__(self, name: str, template_key: str, template_parameters, *args, **kwargs) -> None:
+        self.tkey = template_key
+        self.tparam = template_parameters
+        self.result_url = None
+        super().__init__(name=name, *args, **kwargs)
+    def format_request_data(self, messages: List[Dict], **request_params) -> Dict:  # type: ignore[override]
+        """Format the request data to query the model with.
+        :param messages: List of messages to query the model with.
+            Expected format: [{"role": "user", "content": "Hello!"}, ...]
+        :type messages: List[Dict]
+        :keyword request_params: Additional parameters to pass to the model.
+        :paramtype request_params: Dict
+        :return: The formatted request data.
+        :rtype: Dict
+        """
+        request_data = {"messages": messages, **self.get_model_params()}
+        request_data.update(request_params)
+        return request_data
+    async def get_conversation_completion(
+        self,
+        messages: List[Dict],
+        session: AsyncHttpPipeline,
+        role: str = "assistant",  # pylint: disable=unused-argument
+        **request_params,
+    ) -> dict:
+        """
+        Query the model a single time with a message.
+        :param messages: List of messages to query the model with.
+            Expected format: [{"role": "user", "content": "Hello!"}, ...]
+        :type messages: List[Dict]
+        :param session: AsyncHttpPipeline object to query the model with.
+        :type session: ~azure.ai.evaluation._http_utils.AsyncHttpPipeline
+        :param role: The role of the user sending the message. This parameter is not used in this method;
+            however, it must be included to match the method signature of the parent class. Defaults to "assistant".
+        :type role: str
+        :keyword request_params: Additional parameters to pass to the model.
+        :paramtype request_params: Dict
+        :return: A dictionary representing the completion of the conversation query.
+        :rtype: Dict
+        """
+        request_data = self.format_request_data(
+            messages=messages,
+            **request_params,
+        )
+        return await self.request_api(
+            session=session,
+            request_data=request_data,
+        )
+    async def request_api(
+        self,
+        session: AsyncHttpPipeline,
+        request_data: dict,
+    ) -> dict:
+        """
+        Request the model with a body of data.
+        :param session: HTTPS Session for invoking the endpoint.
+        :type session: AsyncHttpPipeline
+        :param request_data: Prompt dictionary to query the model with. (Pass {"prompt": prompt} instead of prompt.)
+        :type request_data: Dict[str, Any]
+        :return: A body of data resulting from the model query.
+        :rtype: Dict[str, Any]
+        """
+        self._log_request(request_data)
+        token = self.token_manager.get_token()
+        proxy_headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+            "User-Agent": USER_AGENT,
+        }
+        headers = {
+            "Content-Type": "application/json",
+            "X-CV": f"{uuid.uuid4()}",
+            "X-ModelType": self.model or "",
+        }
+        # add all additional headers
+        headers.update(self.additional_headers)  # type: ignore[arg-type]
+        params = {}
+        if self.api_version:
+            params["api-version"] = self.api_version
+        sim_request_dto = SimulationRequestDTO(
+            url=self.endpoint_url,
+            headers=headers,
+            payload=request_data,
+            params=params,
+            templatekey=self.tkey,
+            template_parameters=self.tparam,
+        )
+        time_start = time.time()
+        full_response = None
+        response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
+        if response.status_code != 202:
+            raise HttpResponseError(
+                message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
+            )
+        response = response.json()
+        self.result_url = response["location"]
+        retry_policy = AsyncRetryPolicy(  # set up retry configuration
+            retry_on_status_codes=[202],  # on which statuses to retry
+            retry_total=7,
+            retry_backoff_factor=10.0,
+            retry_backoff_max=180,
+            retry_mode=RetryMode.Exponential,
+        )
+        # initial 15 seconds wait before attempting to fetch result
+        # Need to wait both in this thread and in the async thread for some reason?
+        # Someone not under a crunch and with better async understandings should dig into this more.
+        await asyncio.sleep(15)
+        time.sleep(15)
+        async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
+            response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+                self.result_url, headers=proxy_headers
+            )
+        response.raise_for_status()
+        response_data = response.json()
+        self.logger.info("Response: %s", response_data)
+        # Copy the full response and return it to be saved in jsonl.
+        full_response = copy.copy(response_data)
+        time_taken = time.time() - time_start
+        # pylint: disable=unexpected-keyword-arg
+        parsed_response = self._parse_response(response_data, request_data=request_data)  # type: ignore[call-arg]
+        return {
+            "request": request_data,
+            "response": parsed_response,
+            "time_taken": time_taken,
+            "full_response": full_response,
+        }

azure/ai/evaluation/simulator/_model_tools/_rai_client.py ADDED Viewed

@@ -0,0 +1,157 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import os
+from typing import Any, Dict
+from urllib.parse import urljoin, urlparse
+from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
+from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
+from azure.ai.evaluation._user_agent import USER_AGENT
+from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
+from azure.ai.evaluation._model_configurations import AzureAIProject
+from ._identity_manager import APITokenManager
+api_url = None
+if "RAI_SVC_URL" in os.environ:
+    api_url = os.environ["RAI_SVC_URL"]
+    api_url = api_url.rstrip("/")
+    print(f"Found RAI_SVC_URL in environment variable, using {api_url} for the service endpoint.")
+class RAIClient:
+    """Client for the Responsible AI Service
+    :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
+        name.
+    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param token_manager: The token manager
+    :type token_manage: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
+    """
+    def __init__(self, azure_ai_project: AzureAIProject, token_manager: APITokenManager) -> None:
+        self.azure_ai_project = azure_ai_project
+        self.token_manager = token_manager
+        self.contentharm_parameters = None
+        self.jailbreaks_dataset = None
+        if api_url is not None:
+            host = api_url
+        else:
+            host = self._get_service_discovery_url()
+        segments = [
+            host.rstrip("/"),
+            "raisvc/v1.0/subscriptions",
+            self.azure_ai_project["subscription_id"],
+            "resourceGroups",
+            self.azure_ai_project["resource_group_name"],
+            "providers/Microsoft.MachineLearningServices/workspaces",
+            self.azure_ai_project["project_name"],
+        ]
+        self.api_url = "/".join(segments)
+        # add a "/" at the end of the url
+        self.api_url = self.api_url.rstrip("/") + "/"
+        self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
+        self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
+        self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
+        self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
+    def _get_service_discovery_url(self):
+        bearer_token = self.token_manager.get_token()
+        headers = {"Authorization": f"Bearer {bearer_token}", "Content-Type": "application/json"}
+        http_client = get_http_client()
+        response = http_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+            f"https://management.azure.com/subscriptions/{self.azure_ai_project['subscription_id']}/"
+            f"resourceGroups/{self.azure_ai_project['resource_group_name']}/"
+            f"providers/Microsoft.MachineLearningServices/workspaces/{self.azure_ai_project['project_name']}?"
+            f"api-version=2023-08-01-preview",
+            headers=headers,
+            timeout=5,
+        )
+        if response.status_code != 200:
+            msg = f"Failed to retrieve the discovery service URL."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.RAI_CLIENT,
+                category=ErrorCategory.SERVICE_UNAVAILABLE,
+                blame=ErrorBlame.UNKNOWN,
+            )
+        base_url = urlparse(response.json()["properties"]["discoveryUrl"])
+        return f"{base_url.scheme}://{base_url.netloc}"
+    def _create_async_client(self) -> AsyncHttpPipeline:
+        """Create an async http client with retry mechanism
+        Number of retries is set to 6, and the timeout is set to 5 seconds.
+        :return: The async http client
+        :rtype: ~azure.ai.evaluation._http_utils.AsyncHttpPipeline
+        """
+        return get_async_http_client().with_policies(
+            retry_policy=AsyncRetryPolicy(retry_total=6, retry_backoff_factor=5, retry_mode=RetryMode.Fixed)
+        )
+    async def get_contentharm_parameters(self) -> Any:
+        """Get the content harm parameters, if they exist"""
+        if self.contentharm_parameters is None:
+            self.contentharm_parameters = await self.get(self.parameter_json_endpoint)
+        return self.contentharm_parameters
+    async def get_jailbreaks_dataset(self, type: str) -> Any:
+        "Get the jailbreaks dataset, if exists"
+        if self.jailbreaks_dataset is None:
+            if type == "xpia":
+                self.jailbreaks_dataset = await self.get(self.xpia_jailbreaks_json_endpoint)
+            elif type == "upia":
+                self.jailbreaks_dataset = await self.get(self.jailbreaks_json_endpoint)
+            else:
+                msg = f"Invalid jailbreak type: {type}. Supported types: ['xpia', 'upia']"
+                raise EvaluationException(
+                    message=msg,
+                    internal_message=msg,
+                    target=ErrorTarget.ADVERSARIAL_SIMULATOR,
+                    category=ErrorCategory.INVALID_VALUE,
+                    blame=ErrorBlame.USER_ERROR,
+                )
+        return self.jailbreaks_dataset
+    async def get(self, url: str) -> Any:
+        """Make a GET request to the given url
+        :param url: The url
+        :type url: str
+        :raises EvaluationException: If the Azure safety evaluation service is not available in the current region
+        :return: The response
+        :rtype: Any
+        """
+        token = self.token_manager.get_token()
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+            "User-Agent": USER_AGENT,
+        }
+        session = self._create_async_client()
+        async with session:
+            response = await session.get(url=url, headers=headers)  # pylint: disable=unexpected-keyword-arg
+        if response.status_code == 200:
+            return response.json()
+        msg = "Azure safety evaluation service is not available in your current region, "
+        "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.RAI_CLIENT,
+            category=ErrorCategory.UNKNOWN,
+            blame=ErrorBlame.USER_ERROR,
+        )

azure/ai/evaluation/simulator/_model_tools/_template_handler.py ADDED Viewed

@@ -0,0 +1,157 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from typing import Any, Dict, Optional
+from azure.ai.evaluation._model_configurations import AzureAIProject
+from ._rai_client import RAIClient
+CONTENT_HARM_TEMPLATES_COLLECTION_KEY = set(
+    [
+        "adv_qa",
+        "adv_conversation",
+        "adv_summarization",
+        "adv_search",
+        "adv_rewrite",
+        "adv_content_gen_ungrounded",
+        "adv_content_gen_grounded",
+        "adv_content_protected_material",
+        "adv_politics",
+    ]
+)
+class ContentHarmTemplatesUtils:
+    """Content harm templates utility functions."""
+    @staticmethod
+    def get_template_category(key: str) -> str:
+        """Parse category from template key
+        :param key: The template key
+        :type key: str
+        :return: The category
+        :rtype: str
+        """
+        # Check for datasets whose names do not align with the normal
+        # naming convention where the first segment of the name is the category.
+        if key == "conversation/public/ip/bing_ip.json":
+            return "content_protected_material"
+        return key.split("/")[0]
+    @staticmethod
+    def get_template_key(key: str) -> str:
+        """Given a template dataset name (which looks like a .json file name) convert it into
+        the corresponding template key (which looks like a .md file name). This allows us to
+        properly link datasets to the LLM that must be used to simulate them.
+        :param key: The dataset key.
+        :type key: str
+        :return: The template key.
+        :rtype: str
+        """
+        filepath = key.rsplit(".json")[0]
+        parts = str(filepath).split("/")
+        filename = ContentHarmTemplatesUtils.json_name_to_md_name(parts[-1])
+        prefix = parts[:-1]
+        prefix.append(filename)
+        return "/".join(prefix)
+    @staticmethod
+    def json_name_to_md_name(name) -> str:
+        """Convert JSON filename to Markdown filename
+        :param name: The JSON filename
+        :type name: str
+        :return: The Markdown filename
+        :rtype: str
+        """
+        result = name.replace("_aml", "")
+        return result + ".md"
+class AdversarialTemplate:
+    """Template for adversarial scenarios.
+    :param template_name: The name of the template.
+    :type template_name: str
+    :param text: The template text.
+    :type text: str
+    :param context_key: The context key.
+    :param template_parameters: The template parameters.
+    """
+    def __init__(self, template_name, text, context_key, template_parameters=None) -> None:
+        self.text = text
+        self.context_key = context_key
+        self.template_name = template_name
+        self.template_parameters = template_parameters
+    def __str__(self):
+        return "{{ch_template_placeholder}}"
+class AdversarialTemplateHandler:
+    """
+    Adversarial template handler constructor.
+    :param azure_ai_project: The Azure AI project.
+    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param rai_client: The RAI client.
+    :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
+    """
+    def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
+        self.cached_templates_source = {}
+        # self.template_env = JinjaEnvironment(loader=JinjaFileSystemLoader(searchpath=template_dir))
+        self.azure_ai_project = azure_ai_project
+        self.categorized_ch_parameters = None
+        self.rai_client = rai_client
+    async def _get_content_harm_template_collections(self, collection_key):
+        if self.categorized_ch_parameters is None:
+            categorized_parameters = {}
+            util = ContentHarmTemplatesUtils
+            parameters = await self.rai_client.get_contentharm_parameters()
+            for k in parameters.keys():
+                template_key = util.get_template_key(k)
+                categorized_parameters[template_key] = {
+                    "parameters": parameters[k],
+                    "category": util.get_template_category(k),
+                    "parameters_key": k,
+                }
+            self.categorized_ch_parameters = categorized_parameters
+        template_category = collection_key.split("adv_")[-1]
+        plist = self.categorized_ch_parameters
+        ch_templates = []
+        for key, value in plist.items():
+            if value["category"] == template_category:
+                params = value["parameters"]
+                for p in params:
+                    p.update({"ch_template_placeholder": "{{ch_template_placeholder}}"})
+                template = AdversarialTemplate(template_name=key, text=None, context_key=[], template_parameters=params)
+                ch_templates.append(template)
+        return ch_templates
+    def get_template(self, template_name: str) -> Optional[AdversarialTemplate]:
+        """Generate content harm template.
+        :param template_name: The name of the template.
+        :type template_name: str
+        :return: The generated content harm template.
+        :rtype: Optional[~azure.ai.evaluation.simulator._model_tools.AdversarialTemplate]
+        """
+        if template_name in CONTENT_HARM_TEMPLATES_COLLECTION_KEY:
+            return AdversarialTemplate(template_name=template_name, text=None, context_key=[], template_parameters=None)
+        return None

azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 0.0.0b0py3-none-any.whl → 1.0.0b1py3-none-any.whl