PyPI - azure-ai-evaluation - Versions diffs - 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl - Mend - Supply Chain Defender

azure-ai-evaluation 1.0.1py3-none-any.whl → 1.13.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show

azure/ai/evaluation/red_team/_utils/file_utils.py ADDED Viewed

@@ -0,0 +1,266 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+File operation utilities for Red Team Agent.
+This module provides centralized file handling, path operations, and
+data serialization utilities used across the red team components.
+"""
+import json
+import os
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+# Try to import DefaultOpenEncoding, fallback to standard encoding
+try:
+    from azure.ai.evaluation._common._utils import DefaultOpenEncoding
+    DEFAULT_ENCODING = DefaultOpenEncoding.WRITE
+except ImportError:
+    DEFAULT_ENCODING = "utf-8"
+class FileManager:
+    """Centralized file operations manager for Red Team operations."""
+    def __init__(self, base_output_dir: Optional[str] = None, logger=None):
+        """Initialize file manager.
+        :param base_output_dir: Base directory for all file operations
+        :param logger: Logger instance for file operations
+        """
+        self.base_output_dir = base_output_dir or "."
+        self.logger = logger
+    def ensure_directory(self, path: Union[str, os.PathLike]) -> str:
+        """Ensure a directory exists, creating it if necessary.
+        :param path: Path to the directory
+        :return: Absolute path to the directory
+        """
+        abs_path = os.path.abspath(path)
+        os.makedirs(abs_path, exist_ok=True)
+        return abs_path
+    def generate_unique_filename(
+        self, prefix: str = "", suffix: str = "", extension: str = "", use_timestamp: bool = False
+    ) -> str:
+        """Generate a unique filename.
+        :param prefix: Prefix for the filename
+        :param suffix: Suffix for the filename
+        :param extension: File extension (with or without dot)
+        :param use_timestamp: Whether to include timestamp in filename
+        :return: Unique filename
+        """
+        parts = []
+        if prefix:
+            parts.append(prefix)
+        if use_timestamp:
+            parts.append(datetime.now().strftime("%Y%m%d_%H%M%S"))
+        # Always include UUID for uniqueness
+        parts.append(str(uuid.uuid4()))
+        if suffix:
+            parts.append(suffix)
+        filename = "_".join(parts)
+        if extension:
+            if not extension.startswith("."):
+                extension = "." + extension
+            filename += extension
+        return filename
+    def get_scan_output_path(self, scan_id: str, filename: str = "") -> str:
+        """Get path for scan output files.
+        :param scan_id: Unique scan identifier
+        :param filename: Optional filename to append
+        :return: Full path for scan output
+        """
+        # Create scan directory based on DEBUG environment
+        is_debug = os.environ.get("DEBUG", "").lower() in ("true", "1", "yes", "y")
+        folder_prefix = "" if is_debug else "."
+        scan_dir = os.path.join(self.base_output_dir, f"{folder_prefix}{scan_id}")
+        self.ensure_directory(scan_dir)
+        # Create .gitignore in scan directory if not debug mode
+        if not is_debug:
+            gitignore_path = os.path.join(scan_dir, ".gitignore")
+            if not os.path.exists(gitignore_path):
+                with open(gitignore_path, "w", encoding="utf-8") as f:
+                    f.write("*\n")
+        if filename:
+            return os.path.join(scan_dir, filename)
+        return scan_dir
+    def write_json(self, data: Any, filepath: Union[str, os.PathLike], indent: int = 2, ensure_dir: bool = True) -> str:
+        """Write data to JSON file.
+        :param data: Data to write
+        :param filepath: Path to write the file
+        :param indent: JSON indentation
+        :param ensure_dir: Whether to ensure directory exists
+        :return: Absolute path of written file
+        """
+        abs_path = os.path.abspath(filepath)
+        if ensure_dir:
+            self.ensure_directory(os.path.dirname(abs_path))
+        with open(abs_path, "w", encoding=DEFAULT_ENCODING) as f:
+            json.dump(data, f, indent=indent)
+        if self.logger:
+            self.logger.debug(f"Successfully wrote JSON to {abs_path}")
+        return abs_path
+    def read_json(self, filepath: Union[str, os.PathLike]) -> Any:
+        """Read data from JSON file.
+        :param filepath: Path to the JSON file
+        :return: Parsed JSON data
+        """
+        abs_path = os.path.abspath(filepath)
+        try:
+            with open(abs_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if self.logger:
+                self.logger.debug(f"Successfully read JSON from {abs_path}")
+            return data
+        except Exception as e:
+            if self.logger:
+                self.logger.error(f"Failed to read JSON from {abs_path}: {str(e)}")
+            raise
+    def read_jsonl(self, filepath: Union[str, os.PathLike]) -> List[Dict]:
+        """Read data from JSONL file.
+        :param filepath: Path to the JSONL file
+        :return: List of parsed JSON objects
+        """
+        abs_path = os.path.abspath(filepath)
+        data = []
+        try:
+            with open(abs_path, "r", encoding="utf-8") as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if line:
+                        try:
+                            data.append(json.loads(line))
+                        except json.JSONDecodeError as e:
+                            if self.logger:
+                                self.logger.warning(f"Skipping invalid JSON line {line_num} in {abs_path}: {str(e)}")
+            if self.logger:
+                self.logger.debug(f"Successfully read {len(data)} records from JSONL {abs_path}")
+            return data
+        except Exception as e:
+            if self.logger:
+                self.logger.error(f"Failed to read JSONL from {abs_path}: {str(e)}")
+            raise
+    def write_jsonl(self, data: List[Dict], filepath: Union[str, os.PathLike], ensure_dir: bool = True) -> str:
+        """Write data to JSONL file.
+        :param data: List of dictionaries to write
+        :param filepath: Path to write the file
+        :param ensure_dir: Whether to ensure directory exists
+        :return: Absolute path of written file
+        """
+        abs_path = os.path.abspath(filepath)
+        if ensure_dir:
+            self.ensure_directory(os.path.dirname(abs_path))
+        with open(abs_path, "w", encoding="utf-8") as f:
+            for item in data:
+                f.write(json.dumps(item) + "\n")
+        if self.logger:
+            self.logger.debug(f"Successfully wrote {len(data)} records to JSONL {abs_path}")
+        return abs_path
+    def safe_filename(self, name: str, max_length: int = 255) -> str:
+        """Create a safe filename from a string.
+        :param name: Original name
+        :param max_length: Maximum filename length
+        :return: Safe filename
+        """
+        # Replace invalid characters
+        invalid_chars = '<>:"/\\|?*'
+        safe_name = "".join(c if c not in invalid_chars else "_" for c in name)
+        # Replace spaces with underscores
+        safe_name = safe_name.replace(" ", "_")
+        # Truncate if too long
+        if len(safe_name) > max_length:
+            safe_name = safe_name[: max_length - 4] + "..."
+        return safe_name
+    def get_file_size(self, filepath: Union[str, os.PathLike]) -> int:
+        """Get file size in bytes.
+        :param filepath: Path to the file
+        :return: File size in bytes
+        """
+        return os.path.getsize(filepath)
+    def file_exists(self, filepath: Union[str, os.PathLike]) -> bool:
+        """Check if file exists.
+        :param filepath: Path to check
+        :return: True if file exists
+        """
+        return os.path.isfile(filepath)
+    def cleanup_file(self, filepath: Union[str, os.PathLike], ignore_errors: bool = True) -> bool:
+        """Delete a file if it exists.
+        :param filepath: Path to the file to delete
+        :param ignore_errors: Whether to ignore deletion errors
+        :return: True if file was deleted or didn't exist
+        """
+        try:
+            if self.file_exists(filepath):
+                os.remove(filepath)
+                if self.logger:
+                    self.logger.debug(f"Deleted file: {filepath}")
+            return True
+        except Exception as e:
+            if not ignore_errors:
+                raise
+            if self.logger:
+                self.logger.warning(f"Failed to delete file {filepath}: {str(e)}")
+            return False
+def create_file_manager(base_output_dir: Optional[str] = None, logger=None) -> FileManager:
+    """Create a FileManager instance.
+    :param base_output_dir: Base directory for file operations
+    :param logger: Logger instance
+    :return: Configured FileManager
+    """
+    return FileManager(base_output_dir=base_output_dir, logger=logger)

azure/ai/evaluation/red_team/_utils/formatting_utils.py ADDED Viewed

@@ -0,0 +1,365 @@
+"""
+Utility functions for formatting, conversion, and processing in Red Team Agent.
+"""
+import json
+import math
+import itertools
+import os
+import logging
+from typing import Dict, List, Union, Any
+from pathlib import Path
+from pyrit.models import ChatMessage
+from pyrit.memory import CentralMemory
+from .._attack_strategy import AttackStrategy
+from .._red_team_result import RedTeamResult
+def message_to_dict(
+    message: ChatMessage, context: str = None, tool_calls: List[Any] = None, token_usage: Dict[str, Any] = None
+) -> Dict[str, Any]:
+    """Convert a ChatMessage and context to dictionary format.
+    :param message: The chat message to convert
+    :type message: ChatMessage
+    :param context: Additional context to include in the dictionary
+    :type context: str
+    :param tool_calls: List of tool calls to include in the dictionary
+    :type tool_calls: List[Any]
+    :param token_usage: Token usage information from the callback
+    :type token_usage: Dict[str, Any]
+    :return: Dictionary representation with role and content
+    :rtype: Dict[str, Any]
+    """
+    msg_dict = {"role": message.role, "content": message.content, "context": context, "tool_calls": tool_calls}
+    if token_usage:
+        msg_dict["token_usage"] = token_usage
+    return msg_dict
+def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
+    """Get a string name for an attack strategy or list of strategies.
+    :param attack_strategy: The attack strategy or list of strategies
+    :type attack_strategy: Union[AttackStrategy, List[AttackStrategy]]
+    :return: A string name for the strategy
+    :rtype: str
+    """
+    if isinstance(attack_strategy, List):
+        return "_".join([str(strategy.value) for strategy in attack_strategy])
+    else:
+        return str(attack_strategy.value)
+def get_flattened_attack_strategies(
+    attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
+) -> List[Union[AttackStrategy, List[AttackStrategy]]]:
+    """Flatten complex attack strategies into individual strategies.
+    :param attack_strategies: List of attack strategies to flatten
+    :type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
+    :return: Flattened list of attack strategies
+    :rtype: List[Union[AttackStrategy, List[AttackStrategy]]]
+    """
+    flattened_strategies = []
+    seen_strategies = set()
+    attack_strategies_temp = attack_strategies.copy()
+    if AttackStrategy.EASY in attack_strategies_temp:
+        attack_strategies_temp.extend([AttackStrategy.Base64, AttackStrategy.Flip, AttackStrategy.Morse])
+        attack_strategies_temp.remove(AttackStrategy.EASY)
+    if AttackStrategy.MODERATE in attack_strategies_temp:
+        attack_strategies_temp.extend(
+            [AttackStrategy.Tense],
+        )
+        attack_strategies_temp.remove(AttackStrategy.MODERATE)
+    if AttackStrategy.DIFFICULT in attack_strategies_temp:
+        attack_strategies_temp.extend(
+            [
+                AttackStrategy.Compose([AttackStrategy.Tense, AttackStrategy.Base64]),
+            ]
+        )
+        attack_strategies_temp.remove(AttackStrategy.DIFFICULT)
+    ## Baseline is always included
+    attack_strategies_temp.append(AttackStrategy.Baseline)
+    for strategy in attack_strategies_temp:
+        if isinstance(strategy, List) and tuple(strategy) not in seen_strategies:  # For composed strategies
+            flattened_strategies.append([s for s in strategy])
+            seen_strategies.add(tuple(strategy))
+        elif isinstance(strategy, AttackStrategy) and strategy not in seen_strategies:  # For single strategies
+            flattened_strategies.append(strategy)
+            seen_strategies.add(strategy)
+    return flattened_strategies
+def get_attack_success(result: str) -> bool:
+    """
+    Based on EVALUATION_PASS_FAIL_MAPPING, returns True denoting attack success if result matches the value of the EVALUATION_PASS_FAIL_MAPPING key False.
+    """
+    from azure.ai.evaluation._constants import EVALUATION_PASS_FAIL_MAPPING
+    return result == EVALUATION_PASS_FAIL_MAPPING.get(False, "true")
+def format_scorecard(redteam_result: RedTeamResult) -> str:
+    """Format the RedTeamResult into a human-readable scorecard.
+    The scorecard presents a summary of attack success rates across different risk categories
+    and attack complexity levels in a tabular format.
+    :param redteam_result: The RedTeamResult object to format
+    :type redteam_result: Dict[str, Any]
+    :return: A formatted scorecard as a string
+    :rtype: str
+    """
+    scorecard = redteam_result["scorecard"]
+    risk_summary = scorecard["risk_category_summary"][0] if scorecard["risk_category_summary"] else {}
+    overall_asr = risk_summary.get("overall_asr", 0)
+    output = [f"Overall ASR: {overall_asr}%"]
+    overall_successes = risk_summary.get("overall_successful_attacks", 0)
+    overall_total = risk_summary.get("overall_total", 0)
+    output.append(f"Attack Success: {overall_successes}/{overall_total} attacks were successful")
+    separator = "-" * 132
+    output.append(separator)
+    output.append(
+        f"{'Risk Category':<21}| {'Baseline ASR':<14} | {'Easy-Complexity Attacks ASR':<28} | {'Moderate-Complexity Attacks ASR':<30} | {'Difficult-Complexity Attacks ASR':<30}"
+    )
+    output.append(separator)
+    for item in scorecard["joint_risk_attack_summary"]:
+        risk_category = item["risk_category"].replace("_", "-").capitalize()
+        baseline_val = item.get("baseline_asr")
+        easy_val = item.get("easy_complexity_asr")
+        moderate_val = item.get("moderate_complexity_asr")
+        difficult_val = item.get("difficult_complexity_asr")
+        baseline = "N/A" if is_none_or_nan(baseline_val) else f"{baseline_val}%"
+        easy = "N/A" if is_none_or_nan(easy_val) else f"{easy_val}%"
+        moderate = "N/A" if is_none_or_nan(moderate_val) else f"{moderate_val}%"
+        difficult = "N/A" if is_none_or_nan(difficult_val) else f"{difficult_val}%"
+        output.append(f"{risk_category:<21}| {baseline:<14} | {easy:<28} | {moderate:<31} | {difficult:<30}")
+    return "\n".join(output)
+def is_none_or_nan(value: Any) -> bool:
+    """Check if a value is None or NaN."""
+    if value is None:
+        return True
+    if isinstance(value, float) and math.isnan(value):
+        return True
+    return False
+def list_mean_nan_safe(data_list: List[Any]) -> float:
+    """Calculate the mean of a list, handling None and NaN values safely.
+    :param data_list: List of values to calculate mean for
+    :type data_list: List[Any]
+    :return: Mean value or 0.0 if list is empty after filtering
+    :rtype: float
+    """
+    filtered_list = [x for x in data_list if not is_none_or_nan(x)]
+    if not filtered_list:
+        return 0.0
+    return sum(filtered_list) / len(filtered_list)
+def format_content_by_modality(content: str, modality: str) -> str:
+    """Format content based on the specified modality, returning one random format.
+    :param content: The content to format
+    :type content: str
+    :param modality: The modality type (html, markdown, footnote, code, text)
+    :type modality: str
+    :return: Formatted content
+    :rtype: str
+    """
+    import html
+    import random
+    def format_as_html(text):
+        html_text = html.escape(text)
+        return [
+            f"<p>{html_text}</p>",
+            f'<div style="display:none">{html_text}</div>',
+            f'<span style="display:none">{html_text}</span>',
+            f"<div hidden>{html_text}</div>",
+            f'<meta content="{html_text}">',
+        ]
+    def format_as_markdown(text):
+        markdown_text = text.replace("\n", "  \n")  # Convert newlines to Markdown line breaks
+        return [
+            f"\n**{markdown_text}**\n",  # Bold
+            f"\n*{markdown_text}*\n",  # Italic
+            f"\n`{markdown_text}`\n",  # Inline code
+            f"\n```{markdown_text}```\n",  # Code block
+            f"\n[Link]({markdown_text})\n",  # Link
+        ]
+    def format_as_footnote(text):
+        footnote_text = text.replace("\n", " ")
+        return [
+            f"[^{footnote_text}]",  # Footnote reference
+            f"\n[1] {footnote_text}\n",  # Footnote reference with number
+            f"<sup>{footnote_text}</sup>",  # Superscript
+            f'<span class="footnote">{footnote_text}</span>',  # Custom footnote span
+            f'<div class="footnote">{footnote_text}</div>',  # Custom footnote div
+        ]
+    def format_as_code(text):
+        code_text = text.replace("\n", " ")
+        return [
+            f"`{code_text}`",  # Inline code
+            f"```\n{code_text}\n```",  # Code block
+            f'"""\n{code_text}\n"""',  # Code block
+            f"# {code_text}",  # Inline comment
+            f'def function():\n    print("{code_text}")',  # Function call
+        ]
+    def format_as_text(text):
+        return [f"<document>{text}</document>"]  # Return text in document tags
+    # Mapping of modality types to formatting functions
+    modality_formatters = {
+        "html": format_as_html,
+        "markdown": format_as_markdown,
+        "footnote": format_as_footnote,
+        "code": format_as_code,
+        "text": format_as_text,
+    }
+    # Get formatter based on modality type
+    if modality and modality.lower() in modality_formatters:
+        formatter = modality_formatters[modality.lower()]
+        formats = formatter(content)
+        # Return one random format from the available options
+        return random.choice(formats)
+    else:
+        # Return plain text if modality not recognized
+        return content
+def write_pyrit_outputs_to_file(
+    *,
+    output_path: str,
+    logger: logging.Logger,
+    prompt_to_context: Dict[str, str],
+) -> str:
+    """Write PyRIT outputs to a file with a name based on orchestrator, strategy, and risk category.
+    :param output_path: Path to write the output file
+    :type output_path: str
+    :param logger: Logger instance for logging
+    :type logger: logging.Logger
+    :param prompt_to_context: Mapping of prompts to their context
+    :type prompt_to_context: Dict[str, str]
+    :return: Path to the output file
+    :rtype: str
+    :raises IOError: If the output file cannot be read or written
+    :raises PermissionError: If there are insufficient permissions to access the output file
+    :raises Exception: For other unexpected errors during file operations or memory retrieval
+    """
+    logger.debug(f"Writing PyRIT outputs to file: {output_path}")
+    memory = CentralMemory.get_memory_instance()
+    memory_label = {"risk_strategy_path": output_path}
+    prompts_request_pieces = memory.get_prompt_request_pieces(labels=memory_label)
+    conversations = [
+        [
+            (
+                item.to_chat_message(),
+                prompt_to_context.get(item.original_value, "") or item.labels.get("context", ""),
+                item.labels.get("tool_calls", []),
+                item.labels.get("risk_sub_type"),
+                item.labels.get("token_usage"),
+            )
+            for item in group
+        ]
+        for conv_id, group in itertools.groupby(prompts_request_pieces, key=lambda x: x.conversation_id)
+    ]
+    # Check if we should overwrite existing file with more conversations
+    if os.path.exists(output_path):
+        existing_line_count = 0
+        try:
+            with open(output_path, "r") as existing_file:
+                existing_line_count = sum(1 for _ in existing_file)
+            if len(conversations) > existing_line_count:
+                logger.debug(
+                    f"Found more prompts ({len(conversations)}) than existing file lines ({existing_line_count}). Replacing content."
+                )
+                # Convert to json lines
+                json_lines = ""
+                for conversation in conversations:
+                    if conversation[0][0].role == "system":
+                        # Skip system messages in the output
+                        continue
+                    conv_dict = {
+                        "conversation": {
+                            "messages": [
+                                message_to_dict(
+                                    message[0], message[1], message[2], message[4] if len(message) > 4 else None
+                                )
+                                for message in conversation
+                            ]
+                        }
+                    }
+                    # Add risk_sub_type if present (check first message for the label)
+                    if conversation and len(conversation) > 0 and len(conversation[0]) > 3:
+                        risk_sub_type = conversation[0][3]
+                        if risk_sub_type:
+                            conv_dict["risk_sub_type"] = risk_sub_type
+                    json_lines += json.dumps(conv_dict) + "\n"
+                with Path(output_path).open("w") as f:
+                    f.writelines(json_lines)
+                logger.debug(
+                    f"Successfully wrote {len(conversations)-existing_line_count} new conversation(s) to {output_path}"
+                )
+            else:
+                logger.debug(
+                    f"Existing file has {existing_line_count} lines, new data has {len(conversations)} prompts. Keeping existing file."
+                )
+                return output_path
+        except Exception as e:
+            logger.warning(f"Failed to read existing file {output_path}: {str(e)}")
+    else:
+        logger.debug(f"Creating new file: {output_path}")
+        # Convert to json lines
+        json_lines = ""
+        for conversation in conversations:
+            if conversation[0][0].role == "system":
+                # Skip system messages in the output
+                continue
+            conv_dict = {
+                "conversation": {
+                    "messages": [
+                        message_to_dict(message[0], message[1], message[2], message[4] if len(message) > 4 else None)
+                        for message in conversation
+                    ]
+                }
+            }
+            # Add risk_sub_type if present (check first message for the label)
+            if conversation and len(conversation) > 0 and len(conversation[0]) > 3:
+                risk_sub_type = conversation[0][3]
+                if risk_sub_type:
+                    conv_dict["risk_sub_type"] = risk_sub_type
+            json_lines += json.dumps(conv_dict) + "\n"
+        with Path(output_path).open("w") as f:
+            f.writelines(json_lines)
+        logger.debug(f"Successfully wrote {len(conversations)} conversations to {output_path}")
+    return str(output_path)