PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show

azure/ai/evaluation/red_team/_utils/logging_utils.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""
+Logging utilities for Red Team Agent.
+This module provides consistent logging configuration and helper functions
+for logging throughout the Red Team Agent.
+"""
+import logging
+import os
+from datetime import datetime
+def setup_logger(logger_name="RedTeamLogger", output_dir=None):
+    """Configure and return a logger instance for the Red Team Agent.
+    Creates two handlers:
+    - File handler: Captures all logs at DEBUG level
+    - Console handler: Shows WARNING and above for better visibility
+    :param logger_name: Name to use for the logger
+    :type logger_name: str
+    :param output_dir: Directory to store log files in. If None, logs are stored in current directory.
+    :type output_dir: Optional[str]
+    :return: The configured logger instance
+    :rtype: logging.Logger
+    """
+    # Format matches what's expected in test_setup_logger
+    log_filename = "redteam.log"
+    # If output directory is specified, create path with that directory
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+        log_filepath = os.path.join(output_dir, log_filename)
+    else:
+        log_filepath = log_filename
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(logging.DEBUG)
+    # Clear any existing handlers (in case logger was already configured)
+    if logger.handlers:
+        for handler in logger.handlers:
+            logger.removeHandler(handler)
+    # File handler - captures all logs at DEBUG level with detailed formatting
+    file_handler = logging.FileHandler(log_filepath)
+    file_handler.setLevel(logging.DEBUG)
+    file_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s")
+    file_handler.setFormatter(file_formatter)
+    logger.addHandler(file_handler)
+    # Console handler - shows only WARNING and above to reduce output but keep important messages
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.WARNING)
+    console_formatter = logging.Formatter("%(levelname)s: %(message)s")
+    console_handler.setFormatter(console_formatter)
+    logger.addHandler(console_handler)
+    # Don't propagate to root logger to avoid duplicate logs
+    logger.propagate = False
+    return logger
+def log_section_header(logger, section_title):
+    """Log a section header to improve log readability.
+    :param logger: The logger instance
+    :type logger: logging.Logger
+    :param section_title: The title of the section
+    :type section_title: str
+    """
+    logger.debug("=" * 80)
+    logger.debug(section_title.upper())
+    logger.debug("=" * 80)
+def log_subsection_header(logger, section_title):
+    """Log a subsection header to improve log readability.
+    :param logger: The logger instance
+    :type logger: logging.Logger
+    :param section_title: The title of the subsection
+    :type section_title: str
+    """
+    logger.debug("-" * 60)
+    logger.debug(section_title)
+    logger.debug("-" * 60)
+def log_strategy_start(logger, strategy_name, risk_category):
+    """Log the start of a strategy processing.
+    :param logger: The logger instance
+    :type logger: logging.Logger
+    :param strategy_name: The name of the strategy
+    :type strategy_name: str
+    :param risk_category: The risk category being processed
+    :type risk_category: str
+    """
+    logger.info(f"Starting processing of {strategy_name} strategy for {risk_category} risk category")
+def log_strategy_completion(logger, strategy_name, risk_category, elapsed_time=None):
+    """Log the completion of a strategy processing.
+    :param logger: The logger instance
+    :type logger: logging.Logger
+    :param strategy_name: The name of the strategy
+    :type strategy_name: str
+    :param risk_category: The risk category being processed
+    :type risk_category: str
+    :param elapsed_time: The time taken to process, if available
+    :type elapsed_time: float
+    """
+    if elapsed_time:
+        logger.info(f"Completed {strategy_name} strategy for {risk_category} risk category in {elapsed_time:.2f}s")
+    else:
+        logger.info(f"Completed {strategy_name} strategy for {risk_category} risk category")
+def log_error(logger, message, exception=None, context=None):
+    """Log an error with additional context if available.
+    :param logger: The logger instance
+    :type logger: logging.Logger
+    :param message: The error message
+    :type message: str
+    :param exception: The exception that was raised, if any
+    :type exception: Exception
+    :param context: Additional context about where the error occurred
+    :type context: str
+    """
+    error_msg = message
+    if context:
+        error_msg = f"[{context}] {error_msg}"
+    if exception:
+        error_msg = f"{error_msg}: {str(exception)}"
+    logger.error(error_msg, exc_info=True)

azure/ai/evaluation/red_team/_utils/metric_mapping.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""
+Mapping utilities for metrics and risk categories in Red Team Agent.
+"""
+from typing import Union
+from .._attack_objective_generator import _InternalRiskCategory, RiskCategory
+from azure.ai.evaluation._constants import EvaluationMetrics, _InternalEvaluationMetrics
+from azure.ai.evaluation._common.constants import Tasks, _InternalAnnotationTasks
+# Mapping of risk categories to their corresponding evaluation metrics
+RISK_CATEGORY_METRIC_MAP = {
+    RiskCategory.Violence: EvaluationMetrics.VIOLENCE,
+    RiskCategory.HateUnfairness: EvaluationMetrics.HATE_FAIRNESS,
+    RiskCategory.Sexual: EvaluationMetrics.SEXUAL,
+    RiskCategory.SelfHarm: EvaluationMetrics.SELF_HARM,
+    RiskCategory.ProtectedMaterial: EvaluationMetrics.PROTECTED_MATERIAL,
+    RiskCategory.UngroundedAttributes: EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
+    _InternalRiskCategory.ECI: _InternalEvaluationMetrics.ECI,
+    RiskCategory.CodeVulnerability: EvaluationMetrics.CODE_VULNERABILITY,
+    RiskCategory.SensitiveDataLeakage: EvaluationMetrics.SENSITIVE_DATA_LEAKAGE,
+    RiskCategory.TaskAdherence: EvaluationMetrics.TASK_ADHERENCE,
+    RiskCategory.ProhibitedActions: EvaluationMetrics.PROHIBITED_ACTIONS,
+}
+RISK_CATEGORY_ANNOTATION_TASK_MAP = {
+    RiskCategory.Violence: Tasks.CONTENT_HARM,
+    RiskCategory.HateUnfairness: Tasks.CONTENT_HARM,
+    RiskCategory.Sexual: Tasks.CONTENT_HARM,
+    RiskCategory.SelfHarm: Tasks.CONTENT_HARM,
+    RiskCategory.ProtectedMaterial: Tasks.PROTECTED_MATERIAL,
+    RiskCategory.UngroundedAttributes: Tasks.UNGROUNDED_ATTRIBUTES,
+    _InternalRiskCategory.ECI: _InternalAnnotationTasks.ECI,
+    RiskCategory.CodeVulnerability: Tasks.CODE_VULNERABILITY,
+    RiskCategory.SensitiveDataLeakage: Tasks.SENSITIVE_DATA_LEAKAGE,
+    RiskCategory.TaskAdherence: Tasks.TASK_ADHERENCE,
+    RiskCategory.ProhibitedActions: Tasks.PROHIBITED_ACTIONS,
+}
+def get_metric_from_risk_category(risk_category: Union[RiskCategory]) -> str:
+    """Get the appropriate evaluation metric for a given risk category.
+    :param risk_category: The risk category to map to an evaluation metric
+    :type risk_category: Union[RiskCategory]
+    :return: The corresponding evaluation metric
+    :rtype: str
+    """
+    return RISK_CATEGORY_METRIC_MAP.get(risk_category, EvaluationMetrics.HATE_FAIRNESS)
+def get_annotation_task_from_risk_category(risk_category: Union[RiskCategory]) -> str:
+    """
+    Get the appropriate annotation task for a given risk category.
+    :param risk_category: The risk category to map to an annotation task
+    :type risk_category: Union[RiskCategory]
+    :return: The corresponding annotation task
+    :rtype: str
+    """
+    return RISK_CATEGORY_ANNOTATION_TASK_MAP.get(risk_category, Tasks.CONTENT_HARM)
+def get_attack_objective_from_risk_category(risk_category: Union[RiskCategory]) -> str:
+    """Get the attack objective string for a given risk category.
+    :param risk_category: The risk category to map to an attack objective
+    :type risk_category: Union[RiskCategory]
+    :return: The corresponding attack objective string
+    :rtype: str
+    """
+    if risk_category == RiskCategory.UngroundedAttributes:
+        return "isa"
+    else:
+        return risk_category.value

azure/ai/evaluation/red_team/_utils/objective_utils.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""
+Utility functions for handling attack objectives in Red Team Agent.
+"""
+import uuid
+from typing import Dict, Optional
+def extract_risk_subtype(objective: Dict) -> Optional[str]:
+    """Extract risk-subtype from an objective's target_harms metadata.
+    Searches through the target_harms list in the objective's metadata to find
+    the first non-empty risk-subtype value.
+    :param objective: The objective dictionary containing metadata with target_harms
+    :type objective: Dict
+    :return: The risk-subtype value if found, None otherwise
+    :rtype: Optional[str]
+    """
+    target_harms = objective.get("metadata", {}).get("target_harms", [])
+    if target_harms and isinstance(target_harms, list):
+        for harm in target_harms:
+            if isinstance(harm, dict) and "risk-subtype" in harm:
+                subtype_value = harm.get("risk-subtype")
+                if subtype_value:
+                    return subtype_value
+    return None
+def get_objective_id(objective: Dict) -> str:
+    """Get a unique identifier for an objective.
+    Uses the objective's 'id' field if available. If not present, generates
+    a UUID-based identifier to ensure uniqueness. This avoids using Python's
+    id() which returns memory addresses that can be reused after garbage collection.
+    :param objective: The objective dictionary
+    :type objective: Dict
+    :return: A unique identifier for the objective
+    :rtype: str
+    """
+    obj_id = objective.get("id")
+    if obj_id is not None:
+        return str(obj_id)
+    # Generate a random UUID-based identifier if no 'id' field exists
+    return f"generated-{uuid.uuid4()}"

azure/ai/evaluation/red_team/_utils/progress_utils.py ADDED Viewed

@@ -0,0 +1,252 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+Progress and status management utilities for Red Team Agent.
+This module provides centralized progress tracking, task status management,
+and user feedback utilities for red team operations.
+"""
+import asyncio
+import time
+from datetime import datetime
+from typing import Dict, Optional, Any
+from tqdm import tqdm
+from .constants import TASK_STATUS
+class ProgressManager:
+    """Centralized progress and status tracking for Red Team operations."""
+    def __init__(
+        self, total_tasks: int = 0, logger=None, show_progress_bar: bool = True, progress_desc: str = "Processing"
+    ):
+        """Initialize progress manager.
+        :param total_tasks: Total number of tasks to track
+        :param logger: Logger instance for progress messages
+        :param show_progress_bar: Whether to show a progress bar
+        :param progress_desc: Description for the progress bar
+        """
+        self.total_tasks = total_tasks
+        self.completed_tasks = 0
+        self.failed_tasks = 0
+        self.timeout_tasks = 0
+        self.logger = logger
+        self.show_progress_bar = show_progress_bar
+        self.progress_desc = progress_desc
+        # Task status tracking
+        self.task_statuses: Dict[str, str] = {}
+        # Timing
+        self.start_time: Optional[float] = None
+        self.end_time: Optional[float] = None
+        # Progress bar
+        self.progress_bar: Optional[tqdm] = None
+        self.progress_lock = asyncio.Lock()
+    def start(self) -> None:
+        """Start progress tracking."""
+        self.start_time = time.time()
+        if self.show_progress_bar and self.total_tasks > 0:
+            self.progress_bar = tqdm(
+                total=self.total_tasks,
+                desc=f"{self.progress_desc}: ",
+                ncols=100,
+                unit="task",
+                bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
+            )
+            self.progress_bar.set_postfix({"current": "initializing"})
+    def stop(self) -> None:
+        """Stop progress tracking and cleanup."""
+        self.end_time = time.time()
+        if self.progress_bar:
+            self.progress_bar.close()
+            self.progress_bar = None
+    async def update_task_status(self, task_key: str, status: str, details: Optional[str] = None) -> None:
+        """Update the status of a specific task.
+        :param task_key: Unique identifier for the task
+        :param status: New status for the task
+        :param details: Optional details about the status change
+        """
+        old_status = self.task_statuses.get(task_key)
+        self.task_statuses[task_key] = status
+        # Update counters based on status change
+        if old_status != status:
+            if status == TASK_STATUS["COMPLETED"]:
+                self.completed_tasks += 1
+                await self._update_progress_bar()
+            elif status == TASK_STATUS["FAILED"]:
+                self.failed_tasks += 1
+                await self._update_progress_bar()
+            elif status == TASK_STATUS["TIMEOUT"]:
+                self.timeout_tasks += 1
+                await self._update_progress_bar()
+        # Log status change
+        if self.logger and details:
+            self.logger.debug(f"Task {task_key}: {old_status} -> {status} ({details})")
+    async def _update_progress_bar(self) -> None:
+        """Update the progress bar display."""
+        if not self.progress_bar:
+            return
+        async with self.progress_lock:
+            self.progress_bar.update(1)
+            completion_pct = (self.completed_tasks / self.total_tasks) * 100 if self.total_tasks > 0 else 0
+            # Calculate time estimates
+            if self.start_time:
+                elapsed_time = time.time() - self.start_time
+                if self.completed_tasks > 0:
+                    avg_time_per_task = elapsed_time / self.completed_tasks
+                    remaining_tasks = self.total_tasks - self.completed_tasks - self.failed_tasks - self.timeout_tasks
+                    est_remaining_time = avg_time_per_task * remaining_tasks if remaining_tasks > 0 else 0
+                    postfix = {
+                        "completed": f"{completion_pct:.1f}%",
+                        "failed": self.failed_tasks,
+                        "timeout": self.timeout_tasks,
+                    }
+                    if est_remaining_time > 0:
+                        postfix["eta"] = f"{est_remaining_time/60:.1f}m"
+                    self.progress_bar.set_postfix(postfix)
+    def write_progress_message(self, message: str) -> None:
+        """Write a message that respects the progress bar.
+        :param message: Message to display
+        """
+        if self.progress_bar:
+            tqdm.write(message)
+        else:
+            print(message)
+    def log_task_completion(
+        self, task_name: str, duration: float, success: bool = True, details: Optional[str] = None
+    ) -> None:
+        """Log the completion of a task.
+        :param task_name: Name of the completed task
+        :param duration: Duration in seconds
+        :param success: Whether the task completed successfully
+        :param details: Optional additional details
+        """
+        status_icon = "✅" if success else "❌"
+        message = f"{status_icon} {task_name} completed in {duration:.1f}s"
+        if details:
+            message += f" - {details}"
+        self.write_progress_message(message)
+        if self.logger:
+            log_level = "info" if success else "warning"
+            getattr(self.logger, log_level)(message)
+    def log_task_timeout(self, task_name: str, timeout_duration: float) -> None:
+        """Log a task timeout.
+        :param task_name: Name of the timed out task
+        :param timeout_duration: Timeout duration in seconds
+        """
+        message = f"⚠️ TIMEOUT: {task_name} after {timeout_duration}s"
+        self.write_progress_message(message)
+        if self.logger:
+            self.logger.warning(message)
+    def log_task_error(self, task_name: str, error: Exception) -> None:
+        """Log a task error.
+        :param task_name: Name of the failed task
+        :param error: The exception that occurred
+        """
+        message = f"❌ ERROR: {task_name} - {error.__class__.__name__}: {str(error)}"
+        self.write_progress_message(message)
+        if self.logger:
+            self.logger.error(message)
+    def get_summary(self) -> Dict[str, Any]:
+        """Get a summary of progress and statistics.
+        :return: Dictionary containing progress summary
+        """
+        total_time = None
+        if self.start_time:
+            end_time = self.end_time or time.time()
+            total_time = end_time - self.start_time
+        return {
+            "total_tasks": self.total_tasks,
+            "completed_tasks": self.completed_tasks,
+            "failed_tasks": self.failed_tasks,
+            "timeout_tasks": self.timeout_tasks,
+            "success_rate": (self.completed_tasks / self.total_tasks) * 100 if self.total_tasks > 0 else 0,
+            "total_time_seconds": total_time,
+            "average_time_per_task": (
+                total_time / self.completed_tasks if total_time and self.completed_tasks > 0 else None
+            ),
+            "task_statuses": self.task_statuses.copy(),
+        }
+    def print_summary(self) -> None:
+        """Print a formatted summary of the progress."""
+        summary = self.get_summary()
+        self.write_progress_message("\n" + "=" * 60)
+        self.write_progress_message("EXECUTION SUMMARY")
+        self.write_progress_message("=" * 60)
+        self.write_progress_message(f"Total Tasks: {summary['total_tasks']}")
+        self.write_progress_message(f"Completed: {summary['completed_tasks']}")
+        self.write_progress_message(f"Failed: {summary['failed_tasks']}")
+        self.write_progress_message(f"Timeouts: {summary['timeout_tasks']}")
+        self.write_progress_message(f"Success Rate: {summary['success_rate']:.1f}%")
+        if summary["total_time_seconds"]:
+            self.write_progress_message(f"Total Time: {summary['total_time_seconds']:.1f}s")
+        if summary["average_time_per_task"]:
+            self.write_progress_message(f"Avg Time/Task: {summary['average_time_per_task']:.1f}s")
+        self.write_progress_message("=" * 60)
+    def __enter__(self):
+        """Context manager entry."""
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        self.stop()
+def create_progress_manager(
+    total_tasks: int = 0, logger=None, show_progress_bar: bool = True, progress_desc: str = "Processing"
+) -> ProgressManager:
+    """Create a ProgressManager instance.
+    :param total_tasks: Total number of tasks to track
+    :param logger: Logger instance
+    :param show_progress_bar: Whether to show progress bar
+    :param progress_desc: Description for progress bar
+    :return: Configured ProgressManager
+    """
+    return ProgressManager(
+        total_tasks=total_tasks, logger=logger, show_progress_bar=show_progress_bar, progress_desc=progress_desc
+    )

azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.13.3py3-none-any.whl