PyPI - azure-ai-evaluation - Versions diffs - 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.9.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (85) hide show

azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py CHANGED Viewed

@@ -11,11 +11,16 @@ import asyncio
 import re
 from typing import Dict, Optional, Any, Tuple, List
 from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
-from azure.ai.evaluation.simulator._model_tools._generated_rai_client import GeneratedRAIClient
+from azure.ai.evaluation.simulator._model_tools._generated_rai_client import (
+    GeneratedRAIClient,
+)
 from pyrit.models import PromptRequestResponse, construct_response_from_request
 from pyrit.prompt_target import PromptChatTarget
-from .constants import USER_AGENT
-from .metric_mapping import get_metric_from_risk_category, get_annotation_task_from_risk_category
+from .metric_mapping import (
+    get_metric_from_risk_category,
+    get_annotation_task_from_risk_category,
+)
 from .._attack_objective_generator import RiskCategory
@@ -29,6 +34,7 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
         risk_category: RiskCategory,
         logger: Optional[logging.Logger] = None,
         evaluator_name: Optional[str] = None,
+        context: Optional[str] = None,
     ) -> None:
         """Initialize the RAIServiceEvalChatTarget.
@@ -43,6 +49,7 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
         self.evaluator_name = evaluator_name
         self.credential = credential
         self.azure_ai_project = azure_ai_project
+        self.context = context
     async def send_prompt_async(
         self, *, prompt_request: PromptRequestResponse, objective: str = ""
@@ -52,7 +59,7 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
         thing_to_eval = prompt_request.request_pieces[0].to_dict()["original_value"]
-        thing_to_eval_qr = {"query": "query", "response": thing_to_eval}
+        thing_to_eval_qr = {"query": "query", "response": thing_to_eval, "context": self.context}
         metric_name = get_metric_from_risk_category(self.risk_category)
         annotation_task = get_annotation_task_from_risk_category(self.risk_category)

azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py CHANGED Viewed

@@ -37,6 +37,7 @@ class AzureRAIServiceTrueFalseScorer(Scorer):
         model: Optional[str] = None,
         prompt_template_key: Optional[str] = None,
         logger: Optional[logging.Logger] = None,
+        context: Optional[str] = None,
     ) -> None:
         """Initialize the scorer.
@@ -52,6 +53,7 @@ class AzureRAIServiceTrueFalseScorer(Scorer):
         self._model = model
         self._prompt_template_key = prompt_template_key
         self.logger = logger
+        self.context = context
         # Use AzureRAIServiceTarget for scoring as well
         self._scoring_target_red_llm = RAIServiceEvalChatTarget(
@@ -59,6 +61,7 @@ class AzureRAIServiceTrueFalseScorer(Scorer):
             credential=credential,
             azure_ai_project=azure_ai_project,
             risk_category=risk_category,
+            context=context,
         )
     async def score_async(
@@ -78,6 +81,9 @@ class AzureRAIServiceTrueFalseScorer(Scorer):
         # Validate request_response
         self.validate(request_response, task=task)
+        if not self._scoring_target_red_llm.context:
+            self._scoring_target_red_llm.context = request_response.labels.get("context", "")
         unvalidated_score: UnvalidatedScore = await self._score_value_with_llm(
             prompt_target=self._scoring_target_red_llm,
             system_prompt="Please evaluate the following response.",  # TODO: What should this be?

azure/ai/evaluation/red_team/_utils/constants.py CHANGED Viewed

@@ -55,5 +55,3 @@ TASK_STATUS = {
     "TIMEOUT": "timeout",
     "INCOMPLETE": "incomplete",
 }
-USER_AGENT = "azure-ai-evaluation-redteam"

azure/ai/evaluation/red_team/_utils/exception_utils.py ADDED Viewed

@@ -0,0 +1,345 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+Exception handling utilities for Red Team Agent.
+This module provides centralized exception handling, error categorization,
+and error reporting utilities for red team operations.
+"""
+import logging
+import traceback
+import asyncio
+from typing import Optional, Any, Dict, Union
+from enum import Enum
+class ErrorCategory(Enum):
+    """Categories of errors that can occur during red team operations."""
+    NETWORK = "network"
+    AUTHENTICATION = "authentication"
+    CONFIGURATION = "configuration"
+    DATA_PROCESSING = "data_processing"
+    ORCHESTRATOR = "orchestrator"
+    EVALUATION = "evaluation"
+    FILE_IO = "file_io"
+    TIMEOUT = "timeout"
+    UNKNOWN = "unknown"
+class ErrorSeverity(Enum):
+    """Severity levels for errors."""
+    LOW = "low"  # Warning level, operation can continue
+    MEDIUM = "medium"  # Error level, task failed but scan can continue
+    HIGH = "high"  # Critical error, scan should be aborted
+    FATAL = "fatal"  # Unrecoverable error
+class RedTeamError(Exception):
+    """Base exception for Red Team operations."""
+    def __init__(
+        self,
+        message: str,
+        category: ErrorCategory = ErrorCategory.UNKNOWN,
+        severity: ErrorSeverity = ErrorSeverity.MEDIUM,
+        context: Optional[Dict[str, Any]] = None,
+        original_exception: Optional[Exception] = None,
+    ):
+        super().__init__(message)
+        self.message = message
+        self.category = category
+        self.severity = severity
+        self.context = context or {}
+        self.original_exception = original_exception
+class ExceptionHandler:
+    """Centralized exception handling for Red Team operations."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        """Initialize exception handler.
+        :param logger: Logger instance for error reporting
+        """
+        self.logger = logger or logging.getLogger(__name__)
+        self.error_counts: Dict[ErrorCategory, int] = {category: 0 for category in ErrorCategory}
+    def categorize_exception(self, exception: Exception) -> ErrorCategory:
+        """Categorize an exception based on its type and message.
+        :param exception: The exception to categorize
+        :return: The appropriate error category
+        """
+        import httpx
+        import httpcore
+        # Network-related errors
+        network_exceptions = (
+            httpx.ConnectTimeout,
+            httpx.ReadTimeout,
+            httpx.ConnectError,
+            httpx.HTTPError,
+            httpx.TimeoutException,
+            httpcore.ReadTimeout,
+            ConnectionError,
+            ConnectionRefusedError,
+            ConnectionResetError,
+        )
+        if isinstance(exception, network_exceptions):
+            return ErrorCategory.NETWORK
+        # Timeout errors (separate from network to handle asyncio.TimeoutError)
+        if isinstance(exception, (TimeoutError, asyncio.TimeoutError)):
+            return ErrorCategory.TIMEOUT
+        # File I/O errors
+        if isinstance(exception, (IOError, OSError, FileNotFoundError, PermissionError)):
+            return ErrorCategory.FILE_IO
+        # HTTP status code specific errors
+        if hasattr(exception, "response") and hasattr(exception.response, "status_code"):
+            status_code = exception.response.status_code
+            if 500 <= status_code < 600:
+                return ErrorCategory.NETWORK
+            elif status_code == 401:
+                return ErrorCategory.AUTHENTICATION
+            elif status_code == 403:
+                return ErrorCategory.CONFIGURATION
+        # String-based categorization
+        message = str(exception).lower()
+        # Define keyword mappings for cleaner logic
+        keyword_mappings = {
+            ErrorCategory.AUTHENTICATION: ["authentication", "unauthorized"],
+            ErrorCategory.CONFIGURATION: ["configuration", "config"],
+            ErrorCategory.ORCHESTRATOR: ["orchestrator"],
+            ErrorCategory.EVALUATION: ["evaluation", "evaluate", "model_error"],
+            ErrorCategory.DATA_PROCESSING: ["data", "json"],
+        }
+        for category, keywords in keyword_mappings.items():
+            if any(keyword in message for keyword in keywords):
+                return category
+        return ErrorCategory.UNKNOWN
+    def determine_severity(
+        self, exception: Exception, category: ErrorCategory, context: Optional[Dict[str, Any]] = None
+    ) -> ErrorSeverity:
+        """Determine the severity of an exception.
+        :param exception: The exception to evaluate
+        :param category: The error category
+        :param context: Additional context for severity determination
+        :return: The appropriate error severity
+        """
+        context = context or {}
+        # Critical system errors
+        if isinstance(exception, (MemoryError, SystemExit, KeyboardInterrupt)):
+            return ErrorSeverity.FATAL
+        # Authentication and configuration are typically high severity
+        if category in (ErrorCategory.AUTHENTICATION, ErrorCategory.CONFIGURATION):
+            return ErrorSeverity.HIGH
+        # File I/O errors can be high severity if they involve critical files
+        if category == ErrorCategory.FILE_IO:
+            if context.get("critical_file", False):
+                return ErrorSeverity.HIGH
+            return ErrorSeverity.MEDIUM
+        # Network and timeout errors are usually medium severity (retryable)
+        if category in (ErrorCategory.NETWORK, ErrorCategory.TIMEOUT):
+            return ErrorSeverity.MEDIUM
+        # Task-specific errors are medium severity
+        if category in (ErrorCategory.ORCHESTRATOR, ErrorCategory.EVALUATION, ErrorCategory.DATA_PROCESSING):
+            return ErrorSeverity.MEDIUM
+        return ErrorSeverity.LOW
+    def handle_exception(
+        self,
+        exception: Exception,
+        context: Optional[Dict[str, Any]] = None,
+        task_name: Optional[str] = None,
+        reraise: bool = False,
+    ) -> RedTeamError:
+        """Handle an exception with proper categorization and logging.
+        :param exception: The exception to handle
+        :param context: Additional context information
+        :param task_name: Name of the task where the exception occurred
+        :param reraise: Whether to reraise the exception after handling
+        :return: A RedTeamError with categorized information
+        """
+        context = context or {}
+        # If it's already a RedTeamError, just log and return/reraise
+        if isinstance(exception, RedTeamError):
+            self._log_error(exception, task_name)
+            if reraise:
+                raise exception
+            return exception
+        # Categorize the exception
+        category = self.categorize_exception(exception)
+        severity = self.determine_severity(exception, category, context)
+        # Update error counts
+        self.error_counts[category] += 1
+        # Create RedTeamError
+        message = f"{category.value.title()} error"
+        if task_name:
+            message += f" in {task_name}"
+        message += f": {str(exception)}"
+        red_team_error = RedTeamError(
+            message=message, category=category, severity=severity, context=context, original_exception=exception
+        )
+        # Log the error
+        self._log_error(red_team_error, task_name)
+        if reraise:
+            raise red_team_error
+        return red_team_error
+    def _log_error(self, error: RedTeamError, task_name: Optional[str] = None) -> None:
+        """Log an error with appropriate level based on severity.
+        :param error: The RedTeamError to log
+        :param task_name: Optional task name for context
+        """
+        # Determine log level based on severity
+        if error.severity == ErrorSeverity.FATAL:
+            log_level = logging.CRITICAL
+        elif error.severity == ErrorSeverity.HIGH:
+            log_level = logging.ERROR
+        elif error.severity == ErrorSeverity.MEDIUM:
+            log_level = logging.WARNING
+        else:
+            log_level = logging.INFO
+        # Create log message
+        message_parts = []
+        if task_name:
+            message_parts.append(f"[{task_name}]")
+        message_parts.append(f"[{error.category.value}]")
+        message_parts.append(f"[{error.severity.value}]")
+        message_parts.append(error.message)
+        log_message = " ".join(message_parts)
+        # Log with appropriate level
+        self.logger.log(log_level, log_message)
+        # Log additional context if available
+        if error.context:
+            self.logger.debug(f"Error context: {error.context}")
+        # Log original exception traceback for debugging
+        if error.original_exception and self.logger.isEnabledFor(logging.DEBUG):
+            self.logger.debug(f"Original exception traceback:\n{traceback.format_exc()}")
+    def should_abort_scan(self) -> bool:
+        """Determine if the scan should be aborted based on error patterns.
+        :return: True if the scan should be aborted
+        """
+        # Abort if we have too many high-severity errors
+        high_severity_categories = [ErrorCategory.AUTHENTICATION, ErrorCategory.CONFIGURATION]
+        high_severity_count = sum(self.error_counts[cat] for cat in high_severity_categories)
+        if high_severity_count > 2:
+            return True
+        # Abort if we have too many network errors (indicates systemic issue)
+        if self.error_counts[ErrorCategory.NETWORK] > 10:
+            return True
+        return False
+    def get_error_summary(self) -> Dict[str, Any]:
+        """Get a summary of all errors encountered.
+        :return: Dictionary containing error statistics
+        """
+        total_errors = sum(self.error_counts.values())
+        return {
+            "total_errors": total_errors,
+            "error_counts_by_category": dict(self.error_counts),
+            "most_common_category": max(self.error_counts, key=self.error_counts.get) if total_errors > 0 else None,
+            "should_abort": self.should_abort_scan(),
+        }
+    def log_error_summary(self) -> None:
+        """Log a summary of all errors encountered."""
+        summary = self.get_error_summary()
+        if summary["total_errors"] == 0:
+            self.logger.info("No errors encountered during operation")
+            return
+        self.logger.info(f"Error Summary: {summary['total_errors']} total errors")
+        for category, count in summary["error_counts_by_category"].items():
+            if count > 0:
+                self.logger.info(f"  {category}: {count}")
+        if summary["most_common_category"]:
+            self.logger.info(f"Most common error type: {summary['most_common_category']}")
+def create_exception_handler(logger: Optional[logging.Logger] = None) -> ExceptionHandler:
+    """Create an ExceptionHandler instance.
+    :param logger: Logger instance for error reporting
+    :return: Configured ExceptionHandler
+    """
+    return ExceptionHandler(logger=logger)
+# Convenience context manager for handling exceptions
+class exception_context:
+    """Context manager for handling exceptions in Red Team operations."""
+    def __init__(
+        self,
+        handler: ExceptionHandler,
+        task_name: str,
+        context: Optional[Dict[str, Any]] = None,
+        reraise_fatal: bool = True,
+    ):
+        self.handler = handler
+        self.task_name = task_name
+        self.context = context or {}
+        self.reraise_fatal = reraise_fatal
+        self.error: Optional[RedTeamError] = None
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_val is not None:
+            self.error = self.handler.handle_exception(
+                exception=exc_val, context=self.context, task_name=self.task_name, reraise=False
+            )
+            # Reraise fatal errors unless specifically disabled
+            if self.reraise_fatal and self.error.severity == ErrorSeverity.FATAL:
+                raise self.error
+            # Suppress the original exception (we've handled it)
+            return True
+        return False

azure/ai/evaluation/red_team/_utils/file_utils.py ADDED Viewed

@@ -0,0 +1,266 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+File operation utilities for Red Team Agent.
+This module provides centralized file handling, path operations, and
+data serialization utilities used across the red team components.
+"""
+import json
+import os
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+# Try to import DefaultOpenEncoding, fallback to standard encoding
+try:
+    from azure.ai.evaluation._common._utils import DefaultOpenEncoding
+    DEFAULT_ENCODING = DefaultOpenEncoding.WRITE
+except ImportError:
+    DEFAULT_ENCODING = "utf-8"
+class FileManager:
+    """Centralized file operations manager for Red Team operations."""
+    def __init__(self, base_output_dir: Optional[str] = None, logger=None):
+        """Initialize file manager.
+        :param base_output_dir: Base directory for all file operations
+        :param logger: Logger instance for file operations
+        """
+        self.base_output_dir = base_output_dir or "."
+        self.logger = logger
+    def ensure_directory(self, path: Union[str, os.PathLike]) -> str:
+        """Ensure a directory exists, creating it if necessary.
+        :param path: Path to the directory
+        :return: Absolute path to the directory
+        """
+        abs_path = os.path.abspath(path)
+        os.makedirs(abs_path, exist_ok=True)
+        return abs_path
+    def generate_unique_filename(
+        self, prefix: str = "", suffix: str = "", extension: str = "", use_timestamp: bool = False
+    ) -> str:
+        """Generate a unique filename.
+        :param prefix: Prefix for the filename
+        :param suffix: Suffix for the filename
+        :param extension: File extension (with or without dot)
+        :param use_timestamp: Whether to include timestamp in filename
+        :return: Unique filename
+        """
+        parts = []
+        if prefix:
+            parts.append(prefix)
+        if use_timestamp:
+            parts.append(datetime.now().strftime("%Y%m%d_%H%M%S"))
+        # Always include UUID for uniqueness
+        parts.append(str(uuid.uuid4()))
+        if suffix:
+            parts.append(suffix)
+        filename = "_".join(parts)
+        if extension:
+            if not extension.startswith("."):
+                extension = "." + extension
+            filename += extension
+        return filename
+    def get_scan_output_path(self, scan_id: str, filename: str = "") -> str:
+        """Get path for scan output files.
+        :param scan_id: Unique scan identifier
+        :param filename: Optional filename to append
+        :return: Full path for scan output
+        """
+        # Create scan directory based on DEBUG environment
+        is_debug = os.environ.get("DEBUG", "").lower() in ("true", "1", "yes", "y")
+        folder_prefix = "" if is_debug else "."
+        scan_dir = os.path.join(self.base_output_dir, f"{folder_prefix}{scan_id}")
+        self.ensure_directory(scan_dir)
+        # Create .gitignore in scan directory if not debug mode
+        if not is_debug:
+            gitignore_path = os.path.join(scan_dir, ".gitignore")
+            if not os.path.exists(gitignore_path):
+                with open(gitignore_path, "w", encoding="utf-8") as f:
+                    f.write("*\n")
+        if filename:
+            return os.path.join(scan_dir, filename)
+        return scan_dir
+    def write_json(self, data: Any, filepath: Union[str, os.PathLike], indent: int = 2, ensure_dir: bool = True) -> str:
+        """Write data to JSON file.
+        :param data: Data to write
+        :param filepath: Path to write the file
+        :param indent: JSON indentation
+        :param ensure_dir: Whether to ensure directory exists
+        :return: Absolute path of written file
+        """
+        abs_path = os.path.abspath(filepath)
+        if ensure_dir:
+            self.ensure_directory(os.path.dirname(abs_path))
+        with open(abs_path, "w", encoding=DEFAULT_ENCODING) as f:
+            json.dump(data, f, indent=indent)
+        if self.logger:
+            self.logger.debug(f"Successfully wrote JSON to {abs_path}")
+        return abs_path
+    def read_json(self, filepath: Union[str, os.PathLike]) -> Any:
+        """Read data from JSON file.
+        :param filepath: Path to the JSON file
+        :return: Parsed JSON data
+        """
+        abs_path = os.path.abspath(filepath)
+        try:
+            with open(abs_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if self.logger:
+                self.logger.debug(f"Successfully read JSON from {abs_path}")
+            return data
+        except Exception as e:
+            if self.logger:
+                self.logger.error(f"Failed to read JSON from {abs_path}: {str(e)}")
+            raise
+    def read_jsonl(self, filepath: Union[str, os.PathLike]) -> List[Dict]:
+        """Read data from JSONL file.
+        :param filepath: Path to the JSONL file
+        :return: List of parsed JSON objects
+        """
+        abs_path = os.path.abspath(filepath)
+        data = []
+        try:
+            with open(abs_path, "r", encoding="utf-8") as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if line:
+                        try:
+                            data.append(json.loads(line))
+                        except json.JSONDecodeError as e:
+                            if self.logger:
+                                self.logger.warning(f"Skipping invalid JSON line {line_num} in {abs_path}: {str(e)}")
+            if self.logger:
+                self.logger.debug(f"Successfully read {len(data)} records from JSONL {abs_path}")
+            return data
+        except Exception as e:
+            if self.logger:
+                self.logger.error(f"Failed to read JSONL from {abs_path}: {str(e)}")
+            raise
+    def write_jsonl(self, data: List[Dict], filepath: Union[str, os.PathLike], ensure_dir: bool = True) -> str:
+        """Write data to JSONL file.
+        :param data: List of dictionaries to write
+        :param filepath: Path to write the file
+        :param ensure_dir: Whether to ensure directory exists
+        :return: Absolute path of written file
+        """
+        abs_path = os.path.abspath(filepath)
+        if ensure_dir:
+            self.ensure_directory(os.path.dirname(abs_path))
+        with open(abs_path, "w", encoding="utf-8") as f:
+            for item in data:
+                f.write(json.dumps(item) + "\n")
+        if self.logger:
+            self.logger.debug(f"Successfully wrote {len(data)} records to JSONL {abs_path}")
+        return abs_path
+    def safe_filename(self, name: str, max_length: int = 255) -> str:
+        """Create a safe filename from a string.
+        :param name: Original name
+        :param max_length: Maximum filename length
+        :return: Safe filename
+        """
+        # Replace invalid characters
+        invalid_chars = '<>:"/\\|?*'
+        safe_name = "".join(c if c not in invalid_chars else "_" for c in name)
+        # Replace spaces with underscores
+        safe_name = safe_name.replace(" ", "_")
+        # Truncate if too long
+        if len(safe_name) > max_length:
+            safe_name = safe_name[: max_length - 4] + "..."
+        return safe_name
+    def get_file_size(self, filepath: Union[str, os.PathLike]) -> int:
+        """Get file size in bytes.
+        :param filepath: Path to the file
+        :return: File size in bytes
+        """
+        return os.path.getsize(filepath)
+    def file_exists(self, filepath: Union[str, os.PathLike]) -> bool:
+        """Check if file exists.
+        :param filepath: Path to check
+        :return: True if file exists
+        """
+        return os.path.isfile(filepath)
+    def cleanup_file(self, filepath: Union[str, os.PathLike], ignore_errors: bool = True) -> bool:
+        """Delete a file if it exists.
+        :param filepath: Path to the file to delete
+        :param ignore_errors: Whether to ignore deletion errors
+        :return: True if file was deleted or didn't exist
+        """
+        try:
+            if self.file_exists(filepath):
+                os.remove(filepath)
+                if self.logger:
+                    self.logger.debug(f"Deleted file: {filepath}")
+            return True
+        except Exception as e:
+            if not ignore_errors:
+                raise
+            if self.logger:
+                self.logger.warning(f"Failed to delete file {filepath}: {str(e)}")
+            return False
+def create_file_manager(base_output_dir: Optional[str] = None, logger=None) -> FileManager:
+    """Create a FileManager instance.
+    :param base_output_dir: Base directory for file operations
+    :param logger: Logger instance
+    :return: Configured FileManager
+    """
+    return FileManager(base_output_dir=base_output_dir, logger=logger)

azure-ai-evaluation 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.9.0py3-none-any.whl → 1.11.0py3-none-any.whl