PyPI - azure-ai-evaluation - Versions diffs - 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.9.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (85) hide show

azure/ai/evaluation/red_team/_utils/formatting_utils.py CHANGED Viewed

@@ -3,27 +3,29 @@ Utility functions for formatting, conversion, and processing in Red Team Agent.
 """
 import json
-import pandas as pd
 import math
-from datetime import datetime
-from typing import Dict, List, Union, Any, Optional, cast
+import itertools
+import os
+import logging
+from typing import Dict, List, Union, Any
+from pathlib import Path
+from pyrit.models import ChatMessage
+from pyrit.memory import CentralMemory
 from .._attack_strategy import AttackStrategy
 from .._red_team_result import RedTeamResult
-from pyrit.models import ChatMessage
-def message_to_dict(message: ChatMessage) -> Dict[str, str]:
-    """Convert a ChatMessage to dictionary format.
+def message_to_dict(message: ChatMessage, context: str = None) -> Dict[str, str]:
+    """Convert a ChatMessage and context to dictionary format.
     :param message: The chat message to convert
     :type message: ChatMessage
+    :param context: Additional context to include in the dictionary
+    :type context: str
     :return: Dictionary representation with role and content
     :rtype: Dict[str, str]
     """
-    return {
-        "role": message.role,
-        "content": message.content,
-    }
+    return {"role": message.role, "content": message.content, "context": context}
 def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
@@ -89,7 +91,7 @@ def get_attack_success(result: str) -> bool:
     """
     Based on EVALUATION_PASS_FAIL_MAPPING, returns True denoting attack success if result matches the value of the EVALUATION_PASS_FAIL_MAPPING key False.
     """
-    from .._red_team import EVALUATION_PASS_FAIL_MAPPING
+    from azure.ai.evaluation._constants import EVALUATION_PASS_FAIL_MAPPING
     return result == EVALUATION_PASS_FAIL_MAPPING.get(False, "true")
@@ -117,7 +119,7 @@ def format_scorecard(redteam_result: RedTeamResult) -> str:
     separator = "-" * 132
     output.append(separator)
     output.append(
-        f"{'Risk Category':<18}| {'Baseline ASR':<14} | {'Easy-Complexity Attacks ASR':<28} | {'Moderate-Complexity Attacks ASR':<30} | {'Difficult-Complexity Attacks ASR':<30}"
+        f"{'Risk Category':<21}| {'Baseline ASR':<14} | {'Easy-Complexity Attacks ASR':<28} | {'Moderate-Complexity Attacks ASR':<30} | {'Difficult-Complexity Attacks ASR':<30}"
     )
     output.append(separator)
@@ -134,7 +136,7 @@ def format_scorecard(redteam_result: RedTeamResult) -> str:
         moderate = "N/A" if is_none_or_nan(moderate_val) else f"{moderate_val}%"
         difficult = "N/A" if is_none_or_nan(difficult_val) else f"{difficult_val}%"
-        output.append(f"{risk_category:<18}| {baseline:<14} | {easy:<28} | {moderate:<31} | {difficult:<30}")
+        output.append(f"{risk_category:<21}| {baseline:<14} | {easy:<28} | {moderate:<31} | {difficult:<30}")
     return "\n".join(output)
@@ -160,3 +162,103 @@ def list_mean_nan_safe(data_list: List[Any]) -> float:
     if not filtered_list:
         return 0.0
     return sum(filtered_list) / len(filtered_list)
+def write_pyrit_outputs_to_file(
+    *,
+    output_path: str,
+    logger: logging.Logger,
+    prompt_to_context: Dict[str, str],
+) -> str:
+    """Write PyRIT outputs to a file with a name based on orchestrator, strategy, and risk category.
+    :param output_path: Path to write the output file
+    :type output_path: str
+    :param logger: Logger instance for logging
+    :type logger: logging.Logger
+    :param prompt_to_context: Mapping of prompts to their context
+    :type prompt_to_context: Dict[str, str]
+    :return: Path to the output file
+    :rtype: str
+    :raises IOError: If the output file cannot be read or written
+    :raises PermissionError: If there are insufficient permissions to access the output file
+    :raises Exception: For other unexpected errors during file operations or memory retrieval
+    """
+    logger.debug(f"Writing PyRIT outputs to file: {output_path}")
+    memory = CentralMemory.get_memory_instance()
+    memory_label = {"risk_strategy_path": output_path}
+    prompts_request_pieces = memory.get_prompt_request_pieces(labels=memory_label)
+    conversations = [
+        [
+            (item.to_chat_message(), prompt_to_context.get(item.original_value, "") or item.labels.get("context", ""))
+            for item in group
+        ]
+        for conv_id, group in itertools.groupby(prompts_request_pieces, key=lambda x: x.conversation_id)
+    ]
+    # Check if we should overwrite existing file with more conversations
+    if os.path.exists(output_path):
+        existing_line_count = 0
+        try:
+            with open(output_path, "r") as existing_file:
+                existing_line_count = sum(1 for _ in existing_file)
+            if len(conversations) > existing_line_count:
+                logger.debug(
+                    f"Found more prompts ({len(conversations)}) than existing file lines ({existing_line_count}). Replacing content."
+                )
+                # Convert to json lines
+                json_lines = ""
+                for conversation in conversations:
+                    if conversation[0][0].role == "system":
+                        # Skip system messages in the output
+                        continue
+                    json_lines += (
+                        json.dumps(
+                            {
+                                "conversation": {
+                                    "messages": [message_to_dict(message[0], message[1]) for message in conversation]
+                                }
+                            }
+                        )
+                        + "\n"
+                    )
+                with Path(output_path).open("w") as f:
+                    f.writelines(json_lines)
+                logger.debug(
+                    f"Successfully wrote {len(conversations)-existing_line_count} new conversation(s) to {output_path}"
+                )
+            else:
+                logger.debug(
+                    f"Existing file has {existing_line_count} lines, new data has {len(conversations)} prompts. Keeping existing file."
+                )
+                return output_path
+        except Exception as e:
+            logger.warning(f"Failed to read existing file {output_path}: {str(e)}")
+    else:
+        logger.debug(f"Creating new file: {output_path}")
+        # Convert to json lines
+        json_lines = ""
+        for conversation in conversations:
+            if conversation[0][0].role == "system":
+                # Skip system messages in the output
+                continue
+            json_lines += (
+                json.dumps(
+                    {
+                        "conversation": {
+                            "messages": [message_to_dict(message[0], message[1]) for message in conversation]
+                        }
+                    }
+                )
+                + "\n"
+            )
+        with Path(output_path).open("w") as f:
+            f.writelines(json_lines)
+        logger.debug(f"Successfully wrote {len(conversations)} conversations to {output_path}")
+    return str(output_path)

azure/ai/evaluation/red_team/_utils/metric_mapping.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Mapping utilities for metrics and risk categories in Red Team Agent.
 """
+from typing import Union
 from .._attack_objective_generator import _InternalRiskCategory, RiskCategory
 from azure.ai.evaluation._constants import EvaluationMetrics, _InternalEvaluationMetrics
 from azure.ai.evaluation._common.constants import Tasks, _InternalAnnotationTasks
@@ -13,6 +14,8 @@ RISK_CATEGORY_METRIC_MAP = {
     RiskCategory.Sexual: EvaluationMetrics.SEXUAL,
     RiskCategory.SelfHarm: EvaluationMetrics.SELF_HARM,
     RiskCategory.ProtectedMaterial: EvaluationMetrics.PROTECTED_MATERIAL,
+    RiskCategory.UngroundedAttributes: EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
+    RiskCategory.IndirectAttack: EvaluationMetrics.XPIA,
     _InternalRiskCategory.ECI: _InternalEvaluationMetrics.ECI,
     RiskCategory.CodeVulnerability: EvaluationMetrics.CODE_VULNERABILITY,
 }
@@ -23,28 +26,45 @@ RISK_CATEGORY_ANNOTATION_TASK_MAP = {
     RiskCategory.Sexual: Tasks.CONTENT_HARM,
     RiskCategory.SelfHarm: Tasks.CONTENT_HARM,
     RiskCategory.ProtectedMaterial: Tasks.PROTECTED_MATERIAL,
+    RiskCategory.UngroundedAttributes: Tasks.UNGROUNDED_ATTRIBUTES,
+    RiskCategory.IndirectAttack: Tasks.XPIA,
     _InternalRiskCategory.ECI: _InternalAnnotationTasks.ECI,
     RiskCategory.CodeVulnerability: Tasks.CODE_VULNERABILITY,
 }
-def get_metric_from_risk_category(risk_category: RiskCategory) -> str:
+def get_metric_from_risk_category(risk_category: Union[RiskCategory]) -> str:
     """Get the appropriate evaluation metric for a given risk category.
     :param risk_category: The risk category to map to an evaluation metric
-    :type risk_category: RiskCategory
+    :type risk_category: Union[RiskCategory]
     :return: The corresponding evaluation metric
     :rtype: str
     """
     return RISK_CATEGORY_METRIC_MAP.get(risk_category, EvaluationMetrics.HATE_FAIRNESS)
-def get_annotation_task_from_risk_category(risk_category: RiskCategory) -> str:
+def get_annotation_task_from_risk_category(risk_category: Union[RiskCategory]) -> str:
     """
     Get the appropriate annotation task for a given risk category.
     :param risk_category: The risk category to map to an annotation task
-    :type risk_category: RiskCategory
+    :type risk_category: Union[RiskCategory]
     :return: The corresponding annotation task
     :rtype: str
     """
     return RISK_CATEGORY_ANNOTATION_TASK_MAP.get(risk_category, Tasks.CONTENT_HARM)
+def get_attack_objective_from_risk_category(risk_category: Union[RiskCategory]) -> str:
+    """Get the attack objective string for a given risk category.
+    :param risk_category: The risk category to map to an attack objective
+    :type risk_category: Union[RiskCategory]
+    :return: The corresponding attack objective string
+    :rtype: str
+    """
+    return (
+        "isa"
+        if risk_category == RiskCategory.UngroundedAttributes
+        else "xpia" if risk_category == RiskCategory.IndirectAttack else risk_category.value
+    )

azure/ai/evaluation/red_team/_utils/progress_utils.py ADDED Viewed

@@ -0,0 +1,252 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+Progress and status management utilities for Red Team Agent.
+This module provides centralized progress tracking, task status management,
+and user feedback utilities for red team operations.
+"""
+import asyncio
+import time
+from datetime import datetime
+from typing import Dict, Optional, Any
+from tqdm import tqdm
+from .constants import TASK_STATUS
+class ProgressManager:
+    """Centralized progress and status tracking for Red Team operations."""
+    def __init__(
+        self, total_tasks: int = 0, logger=None, show_progress_bar: bool = True, progress_desc: str = "Processing"
+    ):
+        """Initialize progress manager.
+        :param total_tasks: Total number of tasks to track
+        :param logger: Logger instance for progress messages
+        :param show_progress_bar: Whether to show a progress bar
+        :param progress_desc: Description for the progress bar
+        """
+        self.total_tasks = total_tasks
+        self.completed_tasks = 0
+        self.failed_tasks = 0
+        self.timeout_tasks = 0
+        self.logger = logger
+        self.show_progress_bar = show_progress_bar
+        self.progress_desc = progress_desc
+        # Task status tracking
+        self.task_statuses: Dict[str, str] = {}
+        # Timing
+        self.start_time: Optional[float] = None
+        self.end_time: Optional[float] = None
+        # Progress bar
+        self.progress_bar: Optional[tqdm] = None
+        self.progress_lock = asyncio.Lock()
+    def start(self) -> None:
+        """Start progress tracking."""
+        self.start_time = time.time()
+        if self.show_progress_bar and self.total_tasks > 0:
+            self.progress_bar = tqdm(
+                total=self.total_tasks,
+                desc=f"{self.progress_desc}: ",
+                ncols=100,
+                unit="task",
+                bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
+            )
+            self.progress_bar.set_postfix({"current": "initializing"})
+    def stop(self) -> None:
+        """Stop progress tracking and cleanup."""
+        self.end_time = time.time()
+        if self.progress_bar:
+            self.progress_bar.close()
+            self.progress_bar = None
+    async def update_task_status(self, task_key: str, status: str, details: Optional[str] = None) -> None:
+        """Update the status of a specific task.
+        :param task_key: Unique identifier for the task
+        :param status: New status for the task
+        :param details: Optional details about the status change
+        """
+        old_status = self.task_statuses.get(task_key)
+        self.task_statuses[task_key] = status
+        # Update counters based on status change
+        if old_status != status:
+            if status == TASK_STATUS["COMPLETED"]:
+                self.completed_tasks += 1
+                await self._update_progress_bar()
+            elif status == TASK_STATUS["FAILED"]:
+                self.failed_tasks += 1
+                await self._update_progress_bar()
+            elif status == TASK_STATUS["TIMEOUT"]:
+                self.timeout_tasks += 1
+                await self._update_progress_bar()
+        # Log status change
+        if self.logger and details:
+            self.logger.debug(f"Task {task_key}: {old_status} -> {status} ({details})")
+    async def _update_progress_bar(self) -> None:
+        """Update the progress bar display."""
+        if not self.progress_bar:
+            return
+        async with self.progress_lock:
+            self.progress_bar.update(1)
+            completion_pct = (self.completed_tasks / self.total_tasks) * 100 if self.total_tasks > 0 else 0
+            # Calculate time estimates
+            if self.start_time:
+                elapsed_time = time.time() - self.start_time
+                if self.completed_tasks > 0:
+                    avg_time_per_task = elapsed_time / self.completed_tasks
+                    remaining_tasks = self.total_tasks - self.completed_tasks - self.failed_tasks - self.timeout_tasks
+                    est_remaining_time = avg_time_per_task * remaining_tasks if remaining_tasks > 0 else 0
+                    postfix = {
+                        "completed": f"{completion_pct:.1f}%",
+                        "failed": self.failed_tasks,
+                        "timeout": self.timeout_tasks,
+                    }
+                    if est_remaining_time > 0:
+                        postfix["eta"] = f"{est_remaining_time/60:.1f}m"
+                    self.progress_bar.set_postfix(postfix)
+    def write_progress_message(self, message: str) -> None:
+        """Write a message that respects the progress bar.
+        :param message: Message to display
+        """
+        if self.progress_bar:
+            tqdm.write(message)
+        else:
+            print(message)
+    def log_task_completion(
+        self, task_name: str, duration: float, success: bool = True, details: Optional[str] = None
+    ) -> None:
+        """Log the completion of a task.
+        :param task_name: Name of the completed task
+        :param duration: Duration in seconds
+        :param success: Whether the task completed successfully
+        :param details: Optional additional details
+        """
+        status_icon = "✅" if success else "❌"
+        message = f"{status_icon} {task_name} completed in {duration:.1f}s"
+        if details:
+            message += f" - {details}"
+        self.write_progress_message(message)
+        if self.logger:
+            log_level = "info" if success else "warning"
+            getattr(self.logger, log_level)(message)
+    def log_task_timeout(self, task_name: str, timeout_duration: float) -> None:
+        """Log a task timeout.
+        :param task_name: Name of the timed out task
+        :param timeout_duration: Timeout duration in seconds
+        """
+        message = f"⚠️ TIMEOUT: {task_name} after {timeout_duration}s"
+        self.write_progress_message(message)
+        if self.logger:
+            self.logger.warning(message)
+    def log_task_error(self, task_name: str, error: Exception) -> None:
+        """Log a task error.
+        :param task_name: Name of the failed task
+        :param error: The exception that occurred
+        """
+        message = f"❌ ERROR: {task_name} - {error.__class__.__name__}: {str(error)}"
+        self.write_progress_message(message)
+        if self.logger:
+            self.logger.error(message)
+    def get_summary(self) -> Dict[str, Any]:
+        """Get a summary of progress and statistics.
+        :return: Dictionary containing progress summary
+        """
+        total_time = None
+        if self.start_time:
+            end_time = self.end_time or time.time()
+            total_time = end_time - self.start_time
+        return {
+            "total_tasks": self.total_tasks,
+            "completed_tasks": self.completed_tasks,
+            "failed_tasks": self.failed_tasks,
+            "timeout_tasks": self.timeout_tasks,
+            "success_rate": (self.completed_tasks / self.total_tasks) * 100 if self.total_tasks > 0 else 0,
+            "total_time_seconds": total_time,
+            "average_time_per_task": (
+                total_time / self.completed_tasks if total_time and self.completed_tasks > 0 else None
+            ),
+            "task_statuses": self.task_statuses.copy(),
+        }
+    def print_summary(self) -> None:
+        """Print a formatted summary of the progress."""
+        summary = self.get_summary()
+        self.write_progress_message("\n" + "=" * 60)
+        self.write_progress_message("EXECUTION SUMMARY")
+        self.write_progress_message("=" * 60)
+        self.write_progress_message(f"Total Tasks: {summary['total_tasks']}")
+        self.write_progress_message(f"Completed: {summary['completed_tasks']}")
+        self.write_progress_message(f"Failed: {summary['failed_tasks']}")
+        self.write_progress_message(f"Timeouts: {summary['timeout_tasks']}")
+        self.write_progress_message(f"Success Rate: {summary['success_rate']:.1f}%")
+        if summary["total_time_seconds"]:
+            self.write_progress_message(f"Total Time: {summary['total_time_seconds']:.1f}s")
+        if summary["average_time_per_task"]:
+            self.write_progress_message(f"Avg Time/Task: {summary['average_time_per_task']:.1f}s")
+        self.write_progress_message("=" * 60)
+    def __enter__(self):
+        """Context manager entry."""
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        self.stop()
+def create_progress_manager(
+    total_tasks: int = 0, logger=None, show_progress_bar: bool = True, progress_desc: str = "Processing"
+) -> ProgressManager:
+    """Create a ProgressManager instance.
+    :param total_tasks: Total number of tasks to track
+    :param logger: Logger instance
+    :param show_progress_bar: Whether to show progress bar
+    :param progress_desc: Description for progress bar
+    :return: Configured ProgressManager
+    """
+    return ProgressManager(
+        total_tasks=total_tasks, logger=logger, show_progress_bar=show_progress_bar, progress_desc=progress_desc
+    )

azure-ai-evaluation 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.9.0py3-none-any.whl → 1.11.0py3-none-any.whl