PyPI - azure-ai-evaluation - Versions diffs - 1.7.0__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.7.0py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py ADDED Viewed

@@ -0,0 +1,228 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+This module provides Semantic Kernel Plugin for Red Team Tools.
+These plugins can be used as functions in a Semantic Kernel agent for red teaming purposes.
+"""
+import asyncio
+import json
+from typing import Annotated, Dict, Any, Optional, Callable
+from semantic_kernel.functions import kernel_function
+from azure.ai.evaluation.red_team._agent._agent_tools import RedTeamToolProvider
+from azure.identity import DefaultAzureCredential
+class RedTeamPlugin:
+    """
+    A Semantic Kernel plugin that provides red teaming capabilities.
+    This plugin wraps around the RedTeamToolProvider to provide red teaming functions
+    as Semantic Kernel functions.
+    Example:
+        ```python
+        # Method 1: Create a plugin with individual environment variables
+        plugin = RedTeamPlugin(
+            azure_ai_project_endpoint=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
+            target_func=lambda x: "Target model response"
+        )
+        # Create a Semantic Kernel agent with the plugin
+        agent = ChatCompletionAgent(
+            service=service,
+            name="RedTeamAgent",
+            instructions="You are a red team agent...",
+            plugins=[plugin],
+        )
+        ```
+    """
+    def __init__(
+        self,
+        azure_ai_project_endpoint: str,
+        target_func: Optional[Callable[[str], str]] = None,
+        *,
+        application_scenario: str = "",
+        **kwargs,
+    ):
+        """
+        Initialize the RedTeamPlugin with the necessary configuration components.
+        :param azure_ai_project_endpoint: The Azure AI project endpoint (e.g., 'https://your-resource-name.services.ai.azure.com/api/projects/your-project-name')
+        :param target_func: Optional function to call with prompts
+        :param application_scenario: The application scenario for the tool provider
+        """
+        # Initialize credential and tool provider
+        self.credential = DefaultAzureCredential()
+        self.tool_provider = RedTeamToolProvider(
+            azure_ai_project_endpoint=azure_ai_project_endpoint,
+            credential=self.credential,
+            application_scenario=application_scenario,
+        )
+        # Store the target function
+        self.target_function = target_func
+        # Dictionary to store fetched prompts for reference
+        self.fetched_prompts = {}
+    @kernel_function(description="Fetch a harmful prompt for a specific risk category to test content filters")
+    async def fetch_harmful_prompt(
+        self,
+        risk_category: Annotated[str, "The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')"],
+        strategy: Annotated[str, "Attack strategy to use (e.g., 'baseline', 'jailbreak')"] = "baseline",
+        convert_with_strategy: Annotated[str, "Optional strategy to convert the prompt"] = "",
+    ) -> Annotated[str, "A JSON string with the harmful prompt and metadata"]:
+        """
+        Fetch a harmful prompt for a specific risk category to test content filters.
+        :param risk_category: The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')
+        :param strategy: Attack strategy to use (e.g., 'baseline', 'jailbreak')
+        :param convert_with_strategy: Optional strategy to convert the prompt
+        :return: A JSON string with the harmful prompt and metadata
+        """
+        # Convert empty string to None
+        if not convert_with_strategy:
+            convert_with_strategy = None
+        # Directly await the async method instead of using asyncio.run()
+        result = await self.tool_provider.fetch_harmful_prompt(
+            risk_category_text=risk_category, strategy=strategy, convert_with_strategy=convert_with_strategy
+        )
+        # Store the prompt for later conversion if successful
+        if result["status"] == "success" and "prompt_id" in result:
+            prompt_id = result["prompt_id"]
+            if "prompt" in result:
+                self.fetched_prompts[prompt_id] = result["prompt"]
+                # Also update the tool provider's cache
+                self.tool_provider._fetched_prompts[prompt_id] = result["prompt"]
+        return json.dumps(result)
+    @kernel_function(description="Convert a prompt using a specified strategy")
+    async def convert_prompt(
+        self,
+        prompt_or_id: Annotated[str, "Either a prompt text or a prompt ID from a previous fetch"],
+        strategy: Annotated[str, "The strategy to use for conversion"],
+    ) -> Annotated[str, "A JSON string with the original and converted prompt"]:
+        """
+        Convert a prompt or a previously fetched prompt ID using a specified strategy.
+        :param prompt_or_id: Either a prompt text or a prompt ID from a previous fetch
+        :param strategy: The strategy to use for conversion
+        :return: A JSON string with the original and converted prompt
+        """
+        # Check if input is a prompt ID we have stored
+        if prompt_or_id in self.fetched_prompts:
+            # Update the provider's cache
+            self.tool_provider._fetched_prompts[prompt_or_id] = self.fetched_prompts[prompt_or_id]
+        # Directly await the async method instead of using asyncio.run()
+        result = await self.tool_provider.convert_prompt(prompt_or_id=prompt_or_id, strategy=strategy)
+        return json.dumps(result)
+    @kernel_function(description="Get a harmful prompt for a specific risk category and optionally convert it")
+    async def red_team_unified(
+        self,
+        category: Annotated[str, "The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')"],
+        strategy: Annotated[str, "Optional strategy to convert the prompt"] = "",
+    ) -> Annotated[str, "A JSON string with the harmful prompt and metadata"]:
+        """
+        Get a harmful prompt for a specific risk category and optionally convert it.
+        :param category: The risk category (e.g., 'violence', 'hate_unfairness', 'sexual', 'self_harm')
+        :param strategy: Optional strategy to convert the prompt
+        :return: A JSON string with the harmful prompt and metadata
+        """
+        # Convert empty string to None
+        strategy_param = strategy if strategy else None
+        # Directly await the async method instead of using asyncio.run()
+        result = await self.tool_provider.red_team(category=category, strategy=strategy_param)
+        # Store the prompt for later conversion if it's a success and we didn't convert it
+        if result["status"] == "success":
+            if "prompt_id" in result and "prompt" in result:
+                prompt_id = result["prompt_id"]
+                self.fetched_prompts[prompt_id] = result["prompt"]
+                # Also update the tool provider's cache
+                self.tool_provider._fetched_prompts[prompt_id] = result["prompt"]
+        return json.dumps(result)
+    @kernel_function(description="Get a list of all available prompt conversion strategies")
+    async def get_available_strategies(self) -> Annotated[str, "A JSON string with available conversion strategies"]:
+        """
+        Get a list of all available prompt conversion strategies.
+        :return: A JSON string with available conversion strategies
+        """
+        # This method calls a synchronous function, but we make the method itself async
+        # for consistency with the rest of the interface
+        strategies = self.tool_provider.get_available_strategies()
+        return json.dumps({"status": "success", "available_strategies": strategies})
+    @kernel_function(description="Explain the purpose and responsible use of red teaming tools")
+    async def explain_purpose(self) -> Annotated[str, "A JSON string with information about red teaming tools"]:
+        """
+        Explain the purpose and responsible use of red teaming tools.
+        :return: A JSON string with information about red teaming tools
+        """
+        # This method doesn't use any async functions, but we make it async
+        # for consistency with the rest of the interface
+        explanation = {
+            "purpose": "Red teaming tools are designed to test AI systems for vulnerabilities and safety issues.",
+            "responsible_use": [
+                "Only use these tools on systems you have permission to test.",
+                "Use the results to improve system safety and robustness.",
+                "Do not use these tools to cause harm or bypass legitimate safety measures in production systems.",
+            ],
+            "risk_categories": {
+                "violence": "Content related to physical harm, weapons, or dangerous activities",
+                "hate_unfairness": "Content related to discrimination, bias, or prejudice",
+                "sexual": "Explicit sexual content",
+                "self_harm": "Content related to suicide, self-injury, or harmful behaviors",
+            },
+            "conversion_strategies": "Various encoding and obfuscation methods to test if harmful content can bypass detection",
+        }
+        return json.dumps(explanation)
+    @kernel_function(description="Send a prompt to the target function and return the response")
+    async def send_to_target(
+        self, prompt: Annotated[str, "The prompt text to send to the target function"]
+    ) -> Annotated[str, "A JSON string with the response from the target"]:
+        """
+        Send a prompt to the target function and return the response.
+        :param prompt: The prompt text to send to the target function
+        :return: A JSON string with the response from the target
+        """
+        # This method doesn't use any async functions, but we make it async
+        # for consistency with the rest of the interface
+        if self.target_function is None:
+            return json.dumps(
+                {
+                    "status": "error",
+                    "message": "Target function not initialized. Make sure to pass a target_func when initializing the plugin.",
+                }
+            )
+        try:
+            # Call the target function with the prompt
+            response = self.target_function(prompt)
+            return json.dumps({"status": "success", "prompt": prompt, "response": response})
+        except Exception as e:
+            return json.dumps(
+                {"status": "error", "message": f"Error calling target function: {str(e)}", "prompt": prompt}
+            )

azure/ai/evaluation/red_team/_attack_objective_generator.py CHANGED Viewed

@@ -13,10 +13,19 @@ from azure.ai.evaluation._common._experimental import experimental
 @experimental
 class RiskCategory(str, Enum):
     """Risk categories for attack objectives."""
     HateUnfairness = "hate_unfairness"
     Violence = "violence"
     Sexual = "sexual"
     SelfHarm = "self_harm"
+    ProtectedMaterial = "protected_material"
+    CodeVulnerability = "code_vulnerability"
+@experimental
+class _InternalRiskCategory(str, Enum):
+    ECI = "eci"
 class _AttackObjectiveGenerator:
     """Generator for creating attack objectives.
@@ -30,6 +39,7 @@ class _AttackObjectiveGenerator:
     :param custom_attack_seed_prompts: Path to a JSON file containing custom attack seed prompts (can be absolute or relative path)
     :type custom_attack_seed_prompts: Optional[str]
     """
     def __init__(
         self,
         risk_categories: Optional[List[RiskCategory]] = None,
@@ -42,49 +52,51 @@ class _AttackObjectiveGenerator:
         self.application_scenario = application_scenario
         self.custom_attack_seed_prompts = custom_attack_seed_prompts
         self.logger = logging.getLogger("_AttackObjectiveGenerator")
         # If custom_attack_seed_prompts is provided, validate and load them
         self.custom_prompts = None
         self.validated_prompts = []
         self.valid_prompts_by_category = {}
         if custom_attack_seed_prompts:
             self._load_and_validate_custom_prompts()
     def _load_and_validate_custom_prompts(self) -> None:
         """Load and validate custom attack seed prompts from the provided file path."""
         if not self.custom_attack_seed_prompts:
             return
         # Handle both absolute and relative paths
         custom_prompts_path = Path(self.custom_attack_seed_prompts)
         # Convert to absolute path if it's a relative path
         if not custom_prompts_path.is_absolute():
             self.logger.info(f"Converting relative path '{custom_prompts_path}' to absolute path")
             custom_prompts_path = Path.cwd() / custom_prompts_path
         self.logger.debug(f"Using absolute path: {custom_prompts_path}")
         # Check if the file exists
         if not custom_prompts_path.exists():
             raise ValueError(f"Custom attack seed prompts file not found: {custom_prompts_path}")
         try:
             # Load JSON file
-            with open(custom_prompts_path, 'r', encoding='utf-8') as f:
+            with open(custom_prompts_path, "r", encoding="utf-8") as f:
                 self.custom_prompts = json.load(f)
             # Validate that it's a list
             if not isinstance(self.custom_prompts, list):
-                raise ValueError(f"Custom attack seed prompts must be a JSON array, got {type(self.custom_prompts)}, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                raise ValueError(
+                    f"Custom attack seed prompts must be a JSON array, got {type(self.custom_prompts)}, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                )
             self.logger.info(f"Loaded {len(self.custom_prompts)} prompts from {self.custom_attack_seed_prompts}")
             # Initialize dictionary for categorized prompts
             for risk_category in RiskCategory:
                 self.valid_prompts_by_category[risk_category.value] = []
             # Process each prompt and validate format
             valid_prompts_count = 0
             invalid_prompts_count = 0
@@ -94,102 +106,132 @@ class _AttackObjectiveGenerator:
                     if not isinstance(prompt, dict):
                         self.logger.warning(f"Skipping prompt {i}: not a JSON object")
                         continue
                     if "metadata" not in prompt:
                         self.logger.warning(f"Skipping prompt {i}: missing 'metadata' field")
                         continue
                     if "messages" not in prompt or not prompt["messages"]:
                         self.logger.warning(f"Skipping prompt {i}: missing or empty 'messages' field")
                         continue
                     # Check metadata structure
                     metadata = prompt["metadata"]
                     if not isinstance(metadata, dict):
-                        self.logger.warning(f"Skipping prompt {i}: 'metadata' is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: 'metadata' is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     if "target_harms" not in metadata or not metadata["target_harms"]:
-                        self.logger.warning(f"Skipping prompt {i}: missing or empty 'target_harms' in metadata, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: missing or empty 'target_harms' in metadata, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     # Check target_harms structure
                     valid_risk_types = {cat.value for cat in RiskCategory}
                     valid_risk_found = False
                     prompt_categories = []
                     for harm in metadata["target_harms"]:
                         if not isinstance(harm, dict):
-                            self.logger.warning(f"Skipping harm in prompt {i}: not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                            self.logger.warning(
+                                f"Skipping harm in prompt {i}: not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                            )
                             continue
                         if "risk-type" not in harm:
-                            self.logger.warning(f"Skipping harm in prompt {i}: missing 'risk-type' field, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                            self.logger.warning(
+                                f"Skipping harm in prompt {i}: missing 'risk-type' field, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                            )
                             continue
                         risk_type = harm.get("risk-type", "")
                         if risk_type not in valid_risk_types:
-                            self.logger.warning(f"Skipping harm in prompt {i}: invalid risk-type '{risk_type}'. Valid types: {valid_risk_types}. see https://aka.ms/airedteamingagent-howtodoc for more information")
+                            self.logger.warning(
+                                f"Skipping harm in prompt {i}: invalid risk-type '{risk_type}'. Valid types: {valid_risk_types}. see https://aka.ms/airedteamingagent-howtodoc for more information"
+                            )
                             continue
                         prompt_categories.append(risk_type)
                         valid_risk_found = True
                     if not valid_risk_found:
-                        self.logger.warning(f"Skipping prompt {i}: no valid risk types found. See https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: no valid risk types found. See https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     # Check messages structure
                     messages = prompt["messages"]
                     if not isinstance(messages, list) or not messages:
-                        self.logger.warning(f"Skipping prompt {i}: 'messages' is not a list or is empty, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: 'messages' is not a list or is empty, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     message = messages[0]
                     if not isinstance(message, dict):
-                        self.logger.warning(f"Skipping prompt {i}: first message is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: first message is not a JSON object, see https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     if "role" not in message or message["role"] != "user":
-                        self.logger.warning(f"Skipping prompt {i}: first message must have role='user', see https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: first message must have role='user', see https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     if "content" not in message or not message["content"]:
-                        self.logger.warning(f"Skipping prompt {i}: first message missing or empty 'content', see https://aka.ms/airedteamingagent-howtodoc for more information")
+                        self.logger.warning(
+                            f"Skipping prompt {i}: first message missing or empty 'content', see https://aka.ms/airedteamingagent-howtodoc for more information"
+                        )
                         continue
                     # If we got here, the prompt is valid
                     self.validated_prompts.append(prompt)
                     valid_prompts_count += 1
                     # Add to the appropriate categories
                     for category in prompt_categories:
                         self.valid_prompts_by_category[category].append(prompt)
                 except Exception as e:
                     self.logger.warning(f"Error validating prompt {i}: {str(e)}")
                     invalid_prompts_count += 1
             # Check if we have at least one valid prompt
             if valid_prompts_count == 0:
-                raise ValueError("No valid prompts found in custom attack seed prompts file. See https://aka.ms/airedteamingagent-howtodoc for more information")
+                raise ValueError(
+                    "No valid prompts found in custom attack seed prompts file. See https://aka.ms/airedteamingagent-howtodoc for more information"
+                )
             self.logger.info(f"Loaded {valid_prompts_count} valid prompts from custom attack seed prompts file")
             if invalid_prompts_count > 0:
                 self.logger.warning(f"Skipped {invalid_prompts_count} invalid prompts")
             # Log the breakdown by risk category
-            category_counts = {cat: len(prompts) for cat, prompts in self.valid_prompts_by_category.items() if len(prompts) > 0}
+            category_counts = {
+                cat: len(prompts) for cat, prompts in self.valid_prompts_by_category.items() if len(prompts) > 0
+            }
             self.logger.info(f"Prompt distribution by risk category: {category_counts}")
             # Automatically extract risk categories from valid prompts if not provided
             if not self.risk_categories:
                 categories_with_prompts = [cat for cat, prompts in self.valid_prompts_by_category.items() if prompts]
                 self.risk_categories = [RiskCategory(cat) for cat in categories_with_prompts]
-                self.logger.info(f"Automatically set risk categories based on valid prompts: {[cat.value for cat in self.risk_categories]}")
+                self.logger.info(
+                    f"Automatically set risk categories based on valid prompts: {[cat.value for cat in self.risk_categories]}"
+                )
         except json.JSONDecodeError as e:
-            raise ValueError(f"Failed to parse custom attack seed prompts file: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information")
+            raise ValueError(
+                f"Failed to parse custom attack seed prompts file: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information"
+            )
         except Exception as e:
-            raise ValueError(f"Error loading custom attack seed prompts: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information")
+            raise ValueError(
+                f"Error loading custom attack seed prompts: {str(e)}. See https://aka.ms/airedteamingagent-howtodoc for more information"
+            )

azure/ai/evaluation/red_team/_attack_strategy.py CHANGED Viewed

@@ -9,6 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 @experimental
 class AttackStrategy(Enum):
     """Strategies for attacks."""
     EASY = "easy"
     MODERATE = "moderate"
     DIFFICULT = "difficult"
@@ -34,12 +35,14 @@ class AttackStrategy(Enum):
     Url = "url"
     Baseline = "baseline"
     Jailbreak = "jailbreak"
+    MultiTurn = "multi_turn"
+    Crescendo = "crescendo"
     @classmethod
     def Compose(cls, items: List["AttackStrategy"]) -> List["AttackStrategy"]:
         for item in items:
             if not isinstance(item, cls):
                 raise ValueError("All items must be instances of AttackStrategy")
-        if len(items) > 2:
+        if len(items) > 2:
             raise ValueError("Composed strategies must have at most 2 items")
         return items

azure/ai/evaluation/red_team/_callback_chat_target.py CHANGED Viewed

@@ -49,17 +49,12 @@ class _CallbackChatTarget(PromptChatTarget):
         logger.info(f"Sending the following prompt to the prompt target: {request}")
         # response_context contains "messages", "stream", "session_state, "context"
-        response_context = await self._callback(messages=messages, stream=self._stream, session_state=None, context=None) # type: ignore
+        response_context = await self._callback(messages=messages, stream=self._stream, session_state=None, context=None)  # type: ignore
         response_text = response_context["messages"][-1]["content"]
-        response_entry = construct_response_from_request(
-            request=request, response_text_pieces=[response_text]
-        )
-        logger.info(
-            "Received the following response from the prompt target"
-            + f"{response_text}"
-        )
+        response_entry = construct_response_from_request(request=request, response_text_pieces=[response_text])
+        logger.info("Received the following response from the prompt target" + f"{response_text}")
         return response_entry
     def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:

azure/ai/evaluation/red_team/_default_converter.py CHANGED Viewed

@@ -18,4 +18,4 @@ class _DefaultConverter(PromptConverter):
         return input_type == "text"
     def output_supported(self, output_type: PromptDataType) -> bool:
-        return output_type == "text"
+        return output_type == "text"

azure-ai-evaluation 1.7.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

azure-ai-evaluation 1.7.0py3-none-any.whl → 1.9.0py3-none-any.whl