PyPI - azure-ai-evaluation - Versions diffs - 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.13.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show

azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py CHANGED Viewed

@@ -6,12 +6,15 @@ import copy
 import json
 import time
 import uuid
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, Dict, List, Optional, cast, Union
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
-from azure.ai.evaluation._user_agent import USER_AGENT
-from azure.core.exceptions import HttpResponseError
+from azure.ai.evaluation._user_agent import UserAgentSingleton
+from azure.core.exceptions import HttpResponseError, ServiceResponseError
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
+from azure.ai.evaluation._common.onedp.models import SimulationDTO
+from azure.ai.evaluation._common.constants import RAIService
 from .._model_tools._template_handler import TemplateParameters
 from .models import OpenAIChatCompletionsModel
@@ -40,15 +43,15 @@ class SimulationRequestDTO:
         headers: Dict[str, str],
         payload: Dict[str, Any],
         params: Dict[str, str],
-        templatekey: str,
-        template_parameters: Optional[TemplateParameters],
+        templateKey: str,
+        templateParameters: Optional[TemplateParameters],
     ):
         self.url = url
         self.headers = headers
         self.json = json.dumps(payload)
         self.params = params
-        self.templatekey = templatekey
-        self.templateParameters = template_parameters
+        self.templateKey = templateKey
+        self.templateParameters = templateParameters
     def to_dict(self) -> Dict:
         """Convert the DTO to a dictionary.
@@ -89,6 +92,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
         self.tkey = template_key
         self.tparam = template_parameters
         self.result_url: Optional[str] = None
+        self.simulation_id: Optional[str] = kwargs.pop("simulation_id", "")
         super().__init__(name=name, **kwargs)
@@ -110,7 +114,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
     async def get_conversation_completion(
         self,
         messages: List[Dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str = "assistant",  # pylint: disable=unused-argument
         **request_params,
     ) -> dict:
@@ -141,7 +145,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
     async def request_api(
         self,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         request_data: dict,
     ) -> dict:
         """
@@ -162,13 +166,14 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
         proxy_headers = {
             "Authorization": f"Bearer {token}",
             "Content-Type": "application/json",
-            "User-Agent": USER_AGENT,
+            "User-Agent": UserAgentSingleton().value,
         }
         headers = {
             "Content-Type": "application/json",
             "X-CV": f"{uuid.uuid4()}",
             "X-ModelType": self.model or "",
+            "x-ms-client-request-id": self.simulation_id,
         }
         # add all additional headers
         headers.update(self.additional_headers)  # type: ignore[arg-type]
@@ -181,51 +186,101 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
             headers=headers,
             payload=request_data,
             params=params,
-            templatekey=self.tkey,
-            template_parameters=self.tparam,
+            templateKey=self.tkey,
+            templateParameters=self.tparam,
         )
         time_start = time.time()
         full_response = None
-        response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
-        if response.status_code != 202:
-            raise HttpResponseError(
-                message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
+        if isinstance(session, AIProjectClient):
+            sim_request_dto = SimulationDTO(
+                headers=headers,
+                params=params,
+                json=json.dumps(request_data),
+                template_key=self.tkey,
+                template_parameters=self.tparam,
+            )
+            response_data = session.red_teams.submit_simulation(sim_request_dto, headers=headers, params=params)
+            operation_id = response_data["location"].split("/")[-1]
+            request_count = 0
+            flag = True
+            while flag:
+                try:
+                    response = session.red_teams.operation_results(operation_id, headers=headers)
+                except Exception as e:
+                    from types import SimpleNamespace  # pylint: disable=forgotten-debug-statement
+                    response = SimpleNamespace(status_code=202, text=str(e), json=lambda: {"error": str(e)})
+                if isinstance(response, dict):
+                    response_data = response
+                    flag = False
+                    break
+                if not isinstance(response, SimpleNamespace) and response.get("object") == "chat.completion":
+                    response_data = response
+                    flag = False
+                    break
+                else:
+                    request_count += 1
+                    sleep_time = RAIService.SLEEP_TIME**request_count
+                    await asyncio.sleep(sleep_time)
+        else:
+            # Retry policy for POST request to RAI service
+            service_call_retry_policy = AsyncRetryPolicy(
+                retry_on_exceptions=[ServiceResponseError],
+                retry_total=7,
+                retry_backoff_factor=10.0,
+                retry_backoff_max=180,
+                retry_mode=RetryMode.Exponential,
             )
-        response_data = response.json()
-        self.result_url = cast(str, response_data["location"])
-        retry_policy = AsyncRetryPolicy(  # set up retry configuration
-            retry_on_status_codes=[202],  # on which statuses to retry
-            retry_total=7,
-            retry_backoff_factor=10.0,
-            retry_backoff_max=180,
-            retry_mode=RetryMode.Exponential,
-        )
-        # initial 15 seconds wait before attempting to fetch result
-        # Need to wait both in this thread and in the async thread for some reason?
-        # Someone not under a crunch and with better async understandings should dig into this more.
-        await asyncio.sleep(15)
-        time.sleep(15)
-        async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
-            token = await self.token_manager.get_token_async()
-            proxy_headers = {
-                "Authorization": f"Bearer {token}",
-                "Content-Type": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-            response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-                self.result_url, headers=proxy_headers
+            response = None
+            async with get_async_http_client().with_policies(retry_policy=service_call_retry_policy) as retry_client:
+                try:
+                    response = await retry_client.post(
+                        url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict()
+                    )
+                except ServiceResponseError as e:
+                    self.logger.error("ServiceResponseError during POST request to rai svc after retries: %s", str(e))
+                    raise
+            # response.raise_for_status()
+            if response.status_code != 202:
+                raise HttpResponseError(
+                    message=f"Received unexpected HTTP status: {response.status_code} {response.text()}",
+                    response=response,
+                )
+            response_data = response.json()
+            self.result_url = cast(str, response_data["location"])
+            retry_policy = AsyncRetryPolicy(  # set up retry configuration
+                retry_on_status_codes=[202],  # on which statuses to retry
+                retry_total=7,
+                retry_backoff_factor=10.0,
+                retry_backoff_max=180,
+                retry_mode=RetryMode.Exponential,
             )
-        response.raise_for_status()
+            # initial 15 seconds wait before attempting to fetch result
+            # Need to wait both in this thread and in the async thread for some reason?
+            # Someone not under a crunch and with better async understandings should dig into this more.
+            await asyncio.sleep(15)
+            time.sleep(15)
+            async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
+                token = await self.token_manager.get_token_async()
+                proxy_headers = {
+                    "Authorization": f"Bearer {token}",
+                    "Content-Type": "application/json",
+                    "User-Agent": UserAgentSingleton().value,
+                }
+                response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+                    self.result_url, headers=proxy_headers
+                )
+            response.raise_for_status()
+            response_data = response.json()
-        response_data = response.json()
         self.logger.info("Response: %s", response_data)
         # Copy the full response and return it to be saved in jsonl.

azure/ai/evaluation/simulator/_model_tools/_rai_client.py CHANGED Viewed

@@ -2,13 +2,15 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import os
-from typing import Any
+from typing import Any, Dict, List
 from urllib.parse import urljoin, urlparse
+import base64
+import json
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
 from azure.ai.evaluation._model_configurations import AzureAIProject
-from azure.ai.evaluation._user_agent import USER_AGENT
+from azure.ai.evaluation._user_agent import UserAgentSingleton
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
 from ._identity_manager import APITokenManager
@@ -57,9 +59,11 @@ class RAIClient:  # pylint: disable=client-accepts-api-version-keyword
         # add a "/" at the end of the url
         self.api_url = self.api_url.rstrip("/") + "/"
         self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
+        self.parameter_image_endpoint = urljoin(self.api_url, "simulation/template/parameters/image")
         self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
         self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
         self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
+        self.attack_objectives_endpoint = urljoin(self.api_url, "simulation/attackobjectives")
     def _get_service_discovery_url(self):
         bearer_token = self.token_manager.get_token()
@@ -144,7 +148,7 @@ class RAIClient:  # pylint: disable=client-accepts-api-version-keyword
         headers = {
             "Authorization": f"Bearer {token}",
             "Content-Type": "application/json",
-            "User-Agent": USER_AGENT,
+            "User-Agent": UserAgentSingleton().value,
         }
         session = self._create_async_client()
@@ -166,3 +170,97 @@ class RAIClient:  # pylint: disable=client-accepts-api-version-keyword
             category=ErrorCategory.UNKNOWN,
             blame=ErrorBlame.USER_ERROR,
         )
+    async def get_image_data(self, path: str) -> Any:
+        """Make a GET Image request to the given url
+        :param path: The url of the image
+        :type path: str
+        :raises EvaluationException: If the Azure safety evaluation service is not available in the current region
+        :return: The response
+        :rtype: Any
+        """
+        token = self.token_manager.get_token()
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+            "User-Agent": UserAgentSingleton().value,
+        }
+        session = self._create_async_client()
+        params = {"path": path}
+        async with session:
+            response = await session.get(
+                url=self.parameter_image_endpoint, params=params, headers=headers
+            )  # pylint: disable=unexpected-keyword-arg
+        if response.status_code == 200:
+            return base64.b64encode(response.content).decode("utf-8")
+        msg = (
+            "Azure safety evaluation service is not available in your current region, "
+            + "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
+        )
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.RAI_CLIENT,
+            category=ErrorCategory.UNKNOWN,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    async def get_attack_objectives(
+        self, risk_categories: List[str], application_scenario: str = None, strategy: str = None
+    ) -> Any:
+        """Get the attack objectives based on risk categories and application scenario
+        :param risk_categories: List of risk categories to generate attack objectives for
+        :type risk_categories: List[str]
+        :param application_scenario: Optional description of the application scenario for context
+        :type application_scenario: str
+        :param strategy: Optional attack strategy to get specific objectives for
+        :type strategy: str
+        :return: The attack objectives
+        :rtype: Any
+        """
+        # Create query parameters for the request
+        if application_scenario:
+            raise NotImplementedError("Application scenario is not supported yet")
+        params = {
+            "api-version": "2022-11-01-preview",
+            "riskTypes": ",".join(risk_categories),
+            "lang": "en",  # Default to English
+        }
+        # Add strategy parameter if provided
+        if strategy:
+            params["strategy"] = strategy
+        try:
+            # Make the request using the existing get method
+            result = await self.get(self.attack_objectives_endpoint)
+            # from collections import defaultdict
+            # counts_by_risk = defaultdict(int)
+            # for item in result:
+            #     target_harms = item.get("Metadata", {}).get("TargetHarms", [])
+            #     if not target_harms:
+            #         # No risk type specified
+            #         counts_by_risk["empty"] += 1
+            #     else:
+            #         for harm in target_harms:
+            #             # Use "empty" if the risk type field is missing
+            #             risk_type = harm.get("RiskType", "") or "empty"
+            #             counts_by_risk[risk_type] += 1
+            return result
+        except Exception:
+            # If the API fails or isn't implemented yet, return a mock response
+            # This is temporary until the API endpoint is fully implemented
+            return [
+                {
+                    "metadata": {"lang": "en", "target_harms": [{"risk-type": "violence", "risk-subtype": ""}]},
+                    "messages": [{"role": "user", "content": "Risky content"}],
+                    "modality": "text",
+                    "source": ["source"],
+                }
+            ]

azure/ai/evaluation/simulator/_model_tools/_template_handler.py CHANGED Viewed

@@ -2,11 +2,13 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Dict, List, Optional, TypedDict, cast
+from typing import Dict, List, Optional, TypedDict, cast, Union
+from ast import literal_eval
 from typing_extensions import NotRequired
 from azure.ai.evaluation._model_configurations import AzureAIProject
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
+from azure.ai.evaluation.simulator._adversarial_scenario import AdversarialScenario
 from ._rai_client import RAIClient
@@ -56,6 +58,7 @@ class TemplateParameters(TypedDict):
     category: NotRequired[str]
     target_population: NotRequired[str]
     topic: NotRequired[str]
+    jailbreak_string: NotRequired[str]
 class _CategorizedParameter(TypedDict):
@@ -144,15 +147,18 @@ class AdversarialTemplate:
 class AdversarialTemplateHandler:
     """
-    Adversarial template handler constructor.
+    Initialize the AdversarialTemplateHandler.
-    :param azure_ai_project: The Azure AI project.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param rai_client: The RAI client.
-    :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, AzureAIProject]
+    :param rai_client: The RAI client or AI Project client used for fetching parameters.
+    :type rai_client: Union[~azure.ai.evaluation.simulator._model_tools.RAIClient, ~azure.ai.evaluation._common.onedp._client.AIProjectClient]
     """
-    def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
+    def __init__(
+        self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]
+    ) -> None:
         self.azure_ai_project = azure_ai_project
         self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
         self.rai_client = rai_client
@@ -161,8 +167,10 @@ class AdversarialTemplateHandler:
         if self.categorized_ch_parameters is None:
             categorized_parameters: Dict[str, _CategorizedParameter] = {}
             util = ContentHarmTemplatesUtils
-            parameters = await self.rai_client.get_contentharm_parameters()
+            if isinstance(self.rai_client, RAIClient):
+                parameters = await self.rai_client.get_contentharm_parameters()
+            elif isinstance(self.rai_client, AIProjectClient):
+                parameters = literal_eval(self.rai_client.red_teams.get_template_parameters())
             for k in parameters.keys():
                 template_key = util.get_template_key(k)
@@ -175,17 +183,29 @@ class AdversarialTemplateHandler:
         template_category = collection_key.split("adv_")[-1]
+        # Handle both qa_enterprise and qa_documents mapping to qa
+        if template_category in ["qa_enterprise", "qa_documents"]:
+            template_category = "qa"
         plist = self.categorized_ch_parameters
         ch_templates = []
         for key, value in plist.items():
+            # Skip enterprise templates for ADVERSARIAL_QA
+            if collection_key == AdversarialScenario.ADVERSARIAL_QA.value and "enterprise" in key:
+                continue
+            # Skip non-enterprise templates for ADVERSARIAL_QA_DOCUMENTS
+            if collection_key == AdversarialScenario.ADVERSARIAL_QA_DOCUMENTS.value and "enterprise" not in key:
+                continue
             if value["category"] == template_category:
                 params = value["parameters"]
                 for p in params:
                     p.update({"ch_template_placeholder": "{{ch_template_placeholder}}"})
                 template = AdversarialTemplate(template_name=key, text=None, context_key=[], template_parameters=params)
                 ch_templates.append(template)
         return ch_templates
     def get_template(self, template_name: str) -> Optional[AdversarialTemplate]:

azure/ai/evaluation/simulator/_model_tools/models.py CHANGED Viewed

@@ -12,6 +12,8 @@ from abc import ABC, abstractmethod
 from collections import deque
 from typing import Deque, Dict, List, Optional, Union
 from urllib.parse import urlparse
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
+from ._rai_client import RAIClient
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline
@@ -78,7 +80,7 @@ class LLMBase(ABC):
     async def get_completion(
         self,
         prompt: str,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         **request_params,
     ) -> dict:
         """
@@ -100,7 +102,7 @@ class LLMBase(ABC):
     async def get_all_completions(
         self,
         prompts: List[str],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_max_parallel_count: int,
         api_call_delay_seconds: float,
         request_error_rate_threshold: float,
@@ -120,7 +122,7 @@ class LLMBase(ABC):
     async def get_conversation_completion(
         self,
         messages: List[dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str,
         **request_params,
     ) -> dict:
@@ -274,7 +276,7 @@ class OpenAICompletionsModel(LLMBase):
     async def get_conversation_completion(
         self,
         messages: List[dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str = "assistant",
         **request_params,
     ) -> dict:
@@ -304,7 +306,7 @@ class OpenAICompletionsModel(LLMBase):
     async def get_all_completions(  # type: ignore[override]
         self,
         prompts: List[Dict[str, str]],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_max_parallel_count: int = 1,
         api_call_delay_seconds: float = 0.1,
         request_error_rate_threshold: float = 0.5,
@@ -372,7 +374,7 @@ class OpenAICompletionsModel(LLMBase):
         self,
         request_datas: List[dict],
         output_collector: List,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_delay_seconds: float = 0.1,
         request_error_rate_threshold: float = 0.5,
     ) -> None:
@@ -433,7 +435,7 @@ class OpenAICompletionsModel(LLMBase):
     async def request_api(
         self,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         request_data: dict,
     ) -> dict:
         """
@@ -476,11 +478,12 @@ class OpenAICompletionsModel(LLMBase):
         time_start = time.time()
         full_response = None
-        response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
-        response.raise_for_status()
-        response_data = response.json()
+        if isinstance(session, AIProjectClient):
+            response_data = session.red_teams.submit_simulation(request_data, headers, params)
+        else:
+            response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
+            response.raise_for_status()
+            response_data = response.json()
         self.logger.info(f"Response: {response_data}")
@@ -533,7 +536,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
     async def get_conversation_completion(
         self,
         messages: List[dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str = "assistant",
         **request_params,
     ) -> dict:
@@ -544,7 +547,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
         ----------
         messages: List of messages to query the model with.
         Expected format: [{"role": "user", "content": "Hello!"}, ...]
-        session: AsyncHttpPipeline object to query the model with.
+        session: Union[AsyncHttpPipeline, AIProjectClient] object to query the model with.
         role: Not used for this model, since it is a chat model.
         request_params: Additional parameters to pass to the model.
         """
@@ -560,7 +563,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
     async def get_completion(
         self,
         prompt: str,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         **request_params,
     ) -> dict:
         """
@@ -569,7 +572,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
         Parameters
         ----------
         prompt: Prompt str to query model with.
-        session: AsyncHttpPipeline object to use for the request.
+        session: Union[AsyncHttpPipeline, AIProjectClient] object to use for the request.
         **request_params: Additional parameters to pass to the request.
         """
         messages = [{"role": "system", "content": prompt}]
@@ -583,7 +586,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
     async def get_all_completions(
         self,
         prompts: List[str],  # type: ignore[override]
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_max_parallel_count: int = 1,
         api_call_delay_seconds: float = 0.1,
         request_error_rate_threshold: float = 0.5,

azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.13.5py3-none-any.whl