PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl - Mend - Supply Chain Defender

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show

azure/ai/evaluation/simulator/_adversarial_simulator.py CHANGED Viewed

@@ -6,79 +6,108 @@
 import asyncio
 import logging
 import random
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union, cast
+import uuid
+import warnings
 from tqdm import tqdm
+from azure.ai.evaluation._common._experimental import experimental
+from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import get_async_http_client
 from azure.ai.evaluation._model_configurations import AzureAIProject
-from azure.ai.evaluation.simulator import AdversarialScenario
+from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
 from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
+from azure.ai.evaluation._constants import TokenScope
+from azure.core.credentials import TokenCredential
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
-from azure.identity import DefaultAzureCredential
 from ._constants import SupportedLanguages
-from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
+from ._conversation import (
+    CallbackConversationBot,
+    MultiModalConversationBot,
+    ConversationBot,
+    ConversationRole,
+    ConversationTurn,
+)
 from ._conversation._conversation import simulate_conversation
 from ._model_tools import (
     AdversarialTemplateHandler,
     ManagedIdentityAPITokenManager,
     ProxyChatCompletionsModel,
     RAIClient,
-    TokenScope,
 )
+from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
 from ._utils import JsonLineList
 logger = logging.getLogger(__name__)
+@experimental
 class AdversarialSimulator:
     """
     Initializes the adversarial simulator with a project scope.
-    :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
-        name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, AzureAIProject]
     :param credential: The credential for connecting to Azure AI project.
     :type credential: ~azure.core.credentials.TokenCredential
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_simulate.py
+            :start-after: [START adversarial_scenario]
+            :end-before: [END adversarial_scenario]
+            :language: python
+            :dedent: 8
+            :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
+                2 conversation turns each (4 messages per result).
     """
-    def __init__(self, *, azure_ai_project: AzureAIProject, credential=None):
+    def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
         """Constructor."""
-        # check if azure_ai_project has the keys: subscription_id, resource_group_name and project_name
-        if not all(key in azure_ai_project for key in ["subscription_id", "resource_group_name", "project_name"]):
-            msg = "azure_ai_project must contain keys: subscription_id, resource_group_name, project_name"
-            raise EvaluationException(
-                message=msg,
-                internal_message=msg,
-                target=ErrorTarget.ADVERSARIAL_SIMULATOR,
-                category=ErrorCategory.MISSING_FIELD,
-                blame=ErrorBlame.USER_ERROR,
+        warnings.warn(
+            "DEPRECATION NOTE: Azure AI Evaluation SDK has discontinued active development on the AdversarialSimulator class."
+            + " While existing functionality remains available in preview, it is no longer recommended for production workloads or future integration. "
+            + "We recommend users migrate to the AI Red Teaming Agent for future use as it supports full parity of functionality."
+            + " See https://aka.ms/airedteamingagent-sample for details on AI Red Teaming Agent.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        if is_onedp_project(azure_ai_project):
+            self.azure_ai_project = azure_ai_project
+            self.credential = cast(TokenCredential, credential)
+            self.token_manager = ManagedIdentityAPITokenManager(
+                token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
+                logger=logging.getLogger("AdversarialSimulator"),
+                credential=self.credential,
             )
-        # check the value of the keys in azure_ai_project is not none
-        if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
-            msg = "subscription_id, resource_group_name and project_name cannot be None"
-            raise EvaluationException(
-                message=msg,
-                internal_message=msg,
-                target=ErrorTarget.ADVERSARIAL_SIMULATOR,
-                category=ErrorCategory.MISSING_FIELD,
-                blame=ErrorBlame.USER_ERROR,
+            self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
+        else:
+            try:
+                self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
+            except EvaluationException as e:
+                raise EvaluationException(
+                    message=e.message,
+                    internal_message=e.internal_message,
+                    target=ErrorTarget.ADVERSARIAL_SIMULATOR,
+                    category=e.category,
+                    blame=e.blame,
+                ) from e
+            self.credential = cast(TokenCredential, credential)
+            self.token_manager = ManagedIdentityAPITokenManager(
+                token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
+                logger=logging.getLogger("AdversarialSimulator"),
+                credential=self.credential,
             )
-        if "credential" not in azure_ai_project and not credential:
-            credential = DefaultAzureCredential()
-        elif "credential" in azure_ai_project:
-            credential = azure_ai_project["credential"]
-        self.azure_ai_project = azure_ai_project
-        self.token_manager = ManagedIdentityAPITokenManager(
-            token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
-            logger=logging.getLogger("AdversarialSimulator"),
-            credential=credential,
-        )
-        self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
+            self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
         self.adversarial_template_handler = AdversarialTemplateHandler(
-            azure_ai_project=azure_ai_project, rai_client=self.rai_client
+            azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
         )
     def _ensure_service_dependencies(self):
@@ -92,7 +121,7 @@ class AdversarialSimulator:
                 blame=ErrorBlame.USER_ERROR,
             )
-    # @monitor_adversarial_scenario
+    # pylint: disable=too-many-locals
     async def __call__(
         self,
         *,
@@ -106,10 +135,10 @@ class AdversarialSimulator:
         api_call_retry_sleep_sec: int = 1,
         api_call_delay_sec: int = 0,
         concurrent_async_task: int = 3,
-        _jailbreak_type: Optional[str] = None,
         language: SupportedLanguages = SupportedLanguages.English,
         randomize_order: bool = True,
         randomization_seed: Optional[int] = None,
+        **kwargs,
     ):
         """
         Executes the adversarial simulation against a specified target function asynchronously.
@@ -159,28 +188,6 @@ class AdversarialSimulator:
          The 'content' for 'assistant' role messages may includes the messages that your callback returned.
         :rtype: List[Dict[str, Any]]
-        **Output format**
-        .. code-block:: python
-            return_value = [
-                {
-                    'template_parameters': {},
-                    'messages': [
-                        {
-                            'content': '<jailbreak prompt> <adversarial query>',
-                            'role': 'user'
-                        },
-                        {
-                            'content': "<response from endpoint>",
-                            'role': 'assistant',
-                            'context': None
-                        }
-                    ],
-                    '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
-                }
-            ]
         """
         # validate the inputs
@@ -202,6 +209,14 @@ class AdversarialSimulator:
             )
         self._ensure_service_dependencies()
         templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
+        if len(templates) == 0:
+            raise EvaluationException(
+                message="Templates not found. Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
+                internal_message="Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
+                target=ErrorTarget.ADVERSARIAL_SIMULATOR,
+            )
+        simulation_id = str(uuid.uuid4())
+        logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
         concurrent_async_task = min(concurrent_async_task, 1000)
         semaphore = asyncio.Semaphore(concurrent_async_task)
         sim_results = []
@@ -216,46 +231,85 @@ class AdversarialSimulator:
                 total_tasks,
             )
         total_tasks = min(total_tasks, max_simulation_results)
+        _jailbreak_type = kwargs.get("_jailbreak_type", None)
         if _jailbreak_type:
-            jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
+            if isinstance(self.rai_client, RAIClient):
+                jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
+            elif isinstance(self.rai_client, AIProjectClient):
+                jailbreak_dataset = self.rai_client.red_teams.get_jail_break_dataset_with_type(type=_jailbreak_type)
         progress_bar = tqdm(
             total=total_tasks,
             desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
             ncols=100,
             unit="simulations",
         )
-        for template in templates:
-            parameter_order = list(range(len(template.template_parameters)))
-            if randomize_order:
-                # The template parameter lists are persistent across sim runs within a session,
-                # So randomize a the selection instead of the parameter list directly,
-                # or a potentially large deep copy.
-                if randomization_seed is not None:
-                    random.seed(randomization_seed)
-                random.shuffle(parameter_order)
-            for index in parameter_order:
-                parameter = template.template_parameters[index].copy()
-                if _jailbreak_type == "upia":
-                    parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
-                tasks.append(
-                    asyncio.create_task(
-                        self._simulate_async(
-                            target=target,
-                            template=template,
-                            parameters=parameter,
-                            max_conversation_turns=max_conversation_turns,
-                            api_call_retry_limit=api_call_retry_limit,
-                            api_call_retry_sleep_sec=api_call_retry_sleep_sec,
-                            api_call_delay_sec=api_call_delay_sec,
-                            language=language,
-                            semaphore=semaphore,
-                        )
+        if randomize_order:
+            # The template parameter lists are persistent across sim runs within a session,
+            # So randomize a the selection instead of the parameter list directly,
+            # or a potentially large deep copy.
+            if randomization_seed is not None:
+                # Create a local random instance to avoid polluting global state
+                local_random = random.Random(randomization_seed)
+                local_random.shuffle(templates)
+            else:
+                random.shuffle(templates)
+        # Prepare task parameters based on scenario - but use a single append call for all scenarios
+        tasks = []
+        template_parameter_pairs = []
+        if scenario == AdversarialScenario.ADVERSARIAL_CONVERSATION:
+            # For ADVERSARIAL_CONVERSATION, flatten the parameters
+            for i, template in enumerate(templates):
+                if not template.template_parameters:
+                    continue
+                for parameter in template.template_parameters:
+                    template_parameter_pairs.append((template, parameter))
+        else:
+            # Use original logic for other scenarios - zip parameters
+            parameter_lists = [t.template_parameters for t in templates]
+            zipped_parameters = list(zip(*parameter_lists))
+            for param_group in zipped_parameters:
+                for template, parameter in zip(templates, param_group):
+                    template_parameter_pairs.append((template, parameter))
+        # Limit to max_simulation_results if needed
+        if len(template_parameter_pairs) > max_simulation_results:
+            template_parameter_pairs = template_parameter_pairs[
+                :max_simulation_results
+            ]  # Create a seeded random instance for jailbreak selection if randomization_seed is provided
+        jailbreak_random = None
+        if _jailbreak_type == "upia" and randomization_seed is not None:
+            jailbreak_random = random.Random(randomization_seed)
+        # Single task append loop for all scenarios
+        for template, parameter in template_parameter_pairs:
+            if _jailbreak_type == "upia":
+                if jailbreak_random is not None:
+                    selected_jailbreak = jailbreak_random.choice(jailbreak_dataset)
+                else:
+                    selected_jailbreak = random.choice(jailbreak_dataset)
+                parameter = self._add_jailbreak_parameter(parameter, selected_jailbreak)
+            tasks.append(
+                asyncio.create_task(
+                    self._simulate_async(
+                        target=target,
+                        template=template,
+                        parameters=parameter,
+                        max_conversation_turns=max_conversation_turns,
+                        api_call_retry_limit=api_call_retry_limit,
+                        api_call_retry_sleep_sec=api_call_retry_sleep_sec,
+                        api_call_delay_sec=api_call_delay_sec,
+                        language=language,
+                        semaphore=semaphore,
+                        scenario=scenario,
+                        simulation_id=simulation_id,
                     )
                 )
-                if len(tasks) >= max_simulation_results:
-                    break
-            if len(tasks) >= max_simulation_results:
-                break
+            )
         for task in asyncio.as_completed(tasks):
             sim_results.append(await task)
             progress_bar.update(1)
@@ -263,16 +317,21 @@ class AdversarialSimulator:
         return JsonLineList(sim_results)
-    def _to_chat_protocol(self, *, conversation_history, template_parameters: Dict = None):
+    def _to_chat_protocol(
+        self,
+        *,
+        conversation_history: List[ConversationTurn],
+        template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
+    ):
         if template_parameters is None:
             template_parameters = {}
         messages = []
         for _, m in enumerate(conversation_history):
             message = {"content": m.message, "role": m.role.value}
-            if "context" in m.full_response:
+            if m.full_response is not None and "context" in m.full_response:
                 message["context"] = m.full_response["context"]
             messages.append(message)
-        conversation_category = template_parameters.pop("metadata", {}).get("Category")
+        conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
         template_parameters["metadata"] = {}
         for key in (
             "conversation_starter",
@@ -280,6 +339,9 @@ class AdversarialSimulator:
             "target_population",
             "topic",
             "ch_template_placeholder",
+            "chatbot_name",
+            "name",
+            "group",
         ):
             template_parameters.pop(key, None)
         if conversation_category:
@@ -294,54 +356,92 @@ class AdversarialSimulator:
         self,
         *,
         target: Callable,
-        template,
-        parameters,
-        max_conversation_turns,
-        api_call_retry_limit,
-        api_call_retry_sleep_sec,
-        api_call_delay_sec,
-        language,
-        semaphore,
+        template: AdversarialTemplate,
+        parameters: TemplateParameters,
+        max_conversation_turns: int,
+        api_call_retry_limit: int,
+        api_call_retry_sleep_sec: int,
+        api_call_delay_sec: int,
+        language: SupportedLanguages,
+        semaphore: asyncio.Semaphore,
+        scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
+        simulation_id: str = "",
     ) -> List[Dict]:
-        user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
+        user_bot = self._setup_bot(
+            role=ConversationRole.USER,
+            template=template,
+            parameters=parameters,
+            scenario=scenario,
+            simulation_id=simulation_id,
+        )
         system_bot = self._setup_bot(
-            target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
+            target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
         )
         bots = [user_bot, system_bot]
-        session = get_async_http_client().with_policies(
-            retry_policy=AsyncRetryPolicy(
-                retry_total=api_call_retry_limit,
-                retry_backoff_factor=api_call_retry_sleep_sec,
-                retry_mode=RetryMode.Fixed,
-            )
-        )
-        async with semaphore, session:
-            _, conversation_history = await simulate_conversation(
-                bots=bots,
-                session=session,
-                turn_limit=max_conversation_turns,
-                api_call_delay_sec=api_call_delay_sec,
-                language=language,
+        async def run_simulation(session_obj):
+            async with semaphore:
+                _, conversation_history = await simulate_conversation(
+                    bots=bots,
+                    session=session_obj,
+                    turn_limit=max_conversation_turns,
+                    api_call_delay_sec=api_call_delay_sec,
+                    language=language,
+                )
+            return conversation_history
+        if isinstance(self.rai_client, AIProjectClient):
+            session = self.rai_client
+        else:
+            session = get_async_http_client().with_policies(
+                retry_policy=AsyncRetryPolicy(
+                    retry_total=api_call_retry_limit,
+                    retry_backoff_factor=api_call_retry_sleep_sec,
+                    retry_mode=RetryMode.Fixed,
+                )
             )
-        return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)
+        conversation_history = await run_simulation(session)
+        return self._to_chat_protocol(
+            conversation_history=conversation_history,
+            template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
+        )
-    def _get_user_proxy_completion_model(self, template_key, template_parameters):
+    def _get_user_proxy_completion_model(
+        self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
+    ) -> ProxyChatCompletionsModel:
+        endpoint_url = (
+            self.rai_client._config.endpoint + "/redTeams/simulation/chat/completions/submit"
+            if isinstance(self.rai_client, AIProjectClient)
+            else self.rai_client.simulation_submit_endpoint
+        )
         return ProxyChatCompletionsModel(
             name="raisvc_proxy_model",
             template_key=template_key,
             template_parameters=template_parameters,
-            endpoint_url=self.rai_client.simulation_submit_endpoint,
+            endpoint_url=endpoint_url,
             token_manager=self.token_manager,
             api_version="2023-07-01-preview",
             max_tokens=1200,
             temperature=0.0,
+            simulation_id=simulation_id,
         )
-    def _setup_bot(self, *, role, template, parameters, target: Callable = None):
-        if role == ConversationRole.USER:
+    def _setup_bot(
+        self,
+        *,
+        role: ConversationRole,
+        template: AdversarialTemplate,
+        parameters: TemplateParameters,
+        target: Optional[Callable] = None,
+        scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
+        simulation_id: str = "",
+    ) -> ConversationBot:
+        if role is ConversationRole.USER:
             model = self._get_user_proxy_completion_model(
-                template_key=template.template_name, template_parameters=parameters
+                template_key=template.template_name,
+                template_parameters=parameters,
+                simulation_id=simulation_id,
             )
             return ConversationBot(
                 role=role,
@@ -350,35 +450,61 @@ class AdversarialSimulator:
                 instantiation_parameters=parameters,
             )
-        if role == ConversationRole.ASSISTANT:
+        if role is ConversationRole.ASSISTANT:
+            if target is None:
+                msg = "Cannot setup system bot. Target is None"
-            def dummy_model() -> None:
-                return None
+                raise EvaluationException(
+                    message=msg,
+                    internal_message=msg,
+                    target=ErrorTarget.ADVERSARIAL_SIMULATOR,
+                    error_category=ErrorCategory.INVALID_VALUE,
+                    blame=ErrorBlame.SYSTEM_ERROR,
+                )
+            class DummyModel:
+                def __init__(self):
+                    self.name = "dummy_model"
+                def __call__(self) -> None:
+                    pass
+            if scenario in [
+                _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_GEN,
+                _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_MULTIMODAL,
+            ]:
+                return MultiModalConversationBot(
+                    callback=target,
+                    role=role,
+                    model=DummyModel(),
+                    user_template=str(template),
+                    user_template_parameters=parameters,
+                    rai_client=self.rai_client,
+                    conversation_template="",
+                    instantiation_parameters={},
+                )
-            dummy_model.name = "dummy_model"
             return CallbackConversationBot(
                 callback=target,
                 role=role,
-                model=dummy_model,
+                model=DummyModel(),
                 user_template=str(template),
                 user_template_parameters=parameters,
                 conversation_template="",
                 instantiation_parameters={},
             )
-        return ConversationBot(
-            role=role,
-            model=model,
-            conversation_template=template,
-            instantiation_parameters=parameters,
-        )
-    def _join_conversation_starter(self, parameters, to_join):
-        key = "conversation_starter"
-        if key in parameters.keys():
-            parameters[key] = f"{to_join} {parameters[key]}"
-        else:
-            parameters[key] = to_join
+        msg = "Invalid value for enum ConversationRole. This should never happen."
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.ADVERSARIAL_SIMULATOR,
+            category=ErrorCategory.INVALID_VALUE,
+            blame=ErrorBlame.SYSTEM_ERROR,
+        )
+    def _add_jailbreak_parameter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
+        parameters["jailbreak_string"] = to_join
         return parameters
     def call_sync(

azure/ai/evaluation/simulator/_constants.py CHANGED Viewed

@@ -5,7 +5,17 @@ from enum import Enum
 class SupportedLanguages(Enum):
-    """Supported languages for evaluation, using ISO standard language codes."""
+    """Supported languages for evaluation, using ISO standard language codes.
+    .. admonition:: Example:
+        .. literalinclude:: ../samples/evaluation_samples_simulate.py
+            :start-after: [START supported_languages]
+            :end-before: [END supported_languages]
+            :language: python
+            :dedent: 8
+            :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
+    """
     Spanish = "es"
     Italian = "it"
@@ -15,3 +25,4 @@ class SupportedLanguages(Enum):
     Portuguese = "pt"
     Japanese = "ja"
     English = "en"
+    Korean = "ko"