PyPI - azure-ai-evaluation - Versions diffs - 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show

azure/ai/evaluation/red_team/_red_team_result.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Dict, List, Optional,  TypedDict
+from typing import Dict, List, Optional, TypedDict
 import json
 from azure.ai.evaluation._common._experimental import experimental
@@ -9,7 +9,7 @@ from azure.ai.evaluation._common._experimental import experimental
 @experimental
 class RiskCategorySummary(TypedDict):
     """Summary of attack success rates across risk categories.
     :param overall_asr: Overall attack success rate across all risk categories
     :type overall_asr: float
     :param overall_total: Total number of attacks across all risk categories
@@ -41,6 +41,7 @@ class RiskCategorySummary(TypedDict):
     :param self_harm_successful_attacks: Number of successful attacks for self-harm risk category
     :type self_harm_successful_attacks: int
     """
     overall_asr: float
     overall_total: int
     overall_successful_attacks: int
@@ -61,7 +62,7 @@ class RiskCategorySummary(TypedDict):
 @experimental
 class AttackTechniqueSummary(TypedDict):
     """Summary of attack success rates across complexity levels.
     :param overall_asr: Overall attack success rate across all complexity levels
     :type overall_asr: float
     :param overall_total: Total number of attacks across all complexity levels
@@ -93,6 +94,7 @@ class AttackTechniqueSummary(TypedDict):
     :param difficult_complexity_successful_attacks: Number of successful attacks for difficult complexity level
     :type difficult_complexity_successful_attacks: int
     """
     overall_asr: float
     overall_total: int
     overall_successful_attacks: int
@@ -113,7 +115,7 @@ class AttackTechniqueSummary(TypedDict):
 @experimental
 class JointRiskAttackSummaryItem(TypedDict):
     """Summary of attack success rates for a specific risk category across complexity levels.
     :param risk_category: The risk category being summarized
     :type risk_category: str
     :param baseline_asr: Attack success rate for baseline complexity level
@@ -125,6 +127,7 @@ class JointRiskAttackSummaryItem(TypedDict):
     :param difficult_complexity_asr: Attack success rate for difficult complexity level
     :type difficult_complexity_asr: float
     """
     risk_category: str
     baseline_asr: float
     easy_complexity_asr: float
@@ -135,7 +138,7 @@ class JointRiskAttackSummaryItem(TypedDict):
 @experimental
 class RedTeamingScorecard(TypedDict):
     """TypedDict representation of a Red Team Agent scorecard with the updated structure.
     :param risk_category_summary: Overall metrics by risk category
     :type risk_category_summary: List[RiskCategorySummary]
     :param attack_technique_summary: Overall metrics by attack technique complexity
@@ -145,6 +148,7 @@ class RedTeamingScorecard(TypedDict):
     :param detailed_joint_risk_attack_asr: Detailed ASR information broken down by complexity level, risk category, and converter
     :type detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
     """
     risk_category_summary: List[RiskCategorySummary]
     attack_technique_summary: List[AttackTechniqueSummary]
     joint_risk_attack_summary: List[JointRiskAttackSummaryItem]
@@ -154,7 +158,7 @@ class RedTeamingScorecard(TypedDict):
 @experimental
 class AttackObjectiveSource(TypedDict):
     """Information about how attack objectives were generated.
     :param application_scenario: The application scenario used for generating attack objectives
     :type application_scenario: str
     :param risk_categories: List of risk categories targeted by the attack objectives
@@ -164,6 +168,7 @@ class AttackObjectiveSource(TypedDict):
     :param policy_document: Policy document used for generating attack objectives
     :type policy_document: str
     """
     application_scenario: str
     risk_categories: List[str]
     custom_attack_seed_prompts: str
@@ -173,23 +178,27 @@ class AttackObjectiveSource(TypedDict):
 @experimental
 class RedTeamingParameters(TypedDict):
     """TypedDict representation of parameters used in a red team evaluation with the updated structure.
     :param attack_objective_generated_from: Information about the source of attack objectives
     :type attack_objective_generated_from: AttackObjectiveSource
     :param attack_complexity: List of complexity levels used in the attacks
     :type attack_complexity: List[str]
     :param techniques_used: Dictionary mapping complexity levels to lists of converter techniques used
     :type techniques_used: Dict[str, List[str]]
+    :param attack_success_thresholds: Thresholds used for determining attack success per risk category
+    :type attack_success_thresholds: Dict[str, Any]
     """
     attack_objective_generated_from: AttackObjectiveSource
     attack_complexity: List[str]
     techniques_used: Dict[str, List[str]]
+    attack_success_thresholds: Dict[str, Dict[str, int]]
 @experimental
 class RiskAssessment(TypedDict):
     """Assessment of content safety risks in a conversation.
     :param violence: Assessment of violence-related content
     :type violence: Dict[str, str]
     :param sexual: Assessment of sexual content
@@ -199,6 +208,7 @@ class RiskAssessment(TypedDict):
     :param hate_unfair: Assessment of hate or unfairness content
     :type hate_unfair: Dict[str, str]
     """
     violence: Dict[str, str]
     sexual: Dict[str, str]
     self_harm: Dict[str, str]
@@ -208,7 +218,7 @@ class RiskAssessment(TypedDict):
 @experimental
 class AttackDetails(TypedDict):
     """TypedDict representation of a simulated conversation in a red team evaluation.
     :param attack_success: Whether the attack was successful
     :type attack_success: Optional[bool]
     :param attack_technique: The attack technique used in the conversation
@@ -221,13 +231,17 @@ class AttackDetails(TypedDict):
     :type conversation: List[Dict[str, str]]
     :param risk_assessment: Dictionary containing content safety assessment for the conversation
     :type risk_assessment: Optional[RiskAssessment]
+    :param attack_success_threshold: The threshold value used to determine attack success
+    :type attack_success_threshold: Optional[int]
     """
     attack_success: Optional[bool]
     attack_technique: str
     attack_complexity: str
     risk_category: str
     conversation: List[Dict[str, str]]
     risk_assessment: Optional[RiskAssessment]
+    attack_success_threshold: Optional[int]
 @experimental
@@ -243,6 +257,7 @@ class ScanResult(TypedDict):
     :param studio_url: Optional URL for the studio
     :type studio_url: Optional[str]
     """
     scorecard: RedTeamingScorecard
     parameters: RedTeamingParameters
     attack_details: List[AttackDetails]
@@ -250,12 +265,8 @@ class ScanResult(TypedDict):
 @experimental
-class RedTeamResult():
-    def __init__(
-            self,
-            scan_result: Optional[ScanResult] = None,
-            attack_details: Optional[List[AttackDetails]] = None
-        ):
+class RedTeamResult:
+    def __init__(self, scan_result: Optional[ScanResult] = None, attack_details: Optional[List[AttackDetails]] = None):
         self.scan_result = scan_result
         self.attack_details = attack_details
@@ -271,32 +282,32 @@ class RedTeamResult():
     def to_scorecard(self) -> Optional[RedTeamingScorecard]:
         """Extracts the scorecard from a RedTeamResult object."""
         return self.scan_result.get("scorecard", None) if self.scan_result else None
     def to_eval_qr_json_lines(self) -> str:
         """
         Converts conversations in messages format to query-response format suitable for evaluation.
         The output format follows the JSONL pattern with each line containing:
         {
             "query": "user message content",
             "response": "assistant message content",
             "risk_category": "risk category",
             "attack_strategy": "strategy name",
-            "attack_complexity": "complexity level",
+            "attack_complexity": "complexity level",
             "attack_success": "true|false", (if available from evaluation)
             "category": "risk category", (if available from evaluation)
             "severity_level": "low|medium|high", (if available from evaluation)
             "threshold": "threshold value" (if available from evaluation)
         }
         :returns: A list of strings containing query-response pairs in JSONL format.
         :rtype: List[str]
         """
         if not self.attack_details:
             return ""
         result_lines = []
         for conversation in self.attack_details:
             messages = conversation.get("conversation", [])
             attack_technique = conversation.get("attack_technique", "")
@@ -304,12 +315,12 @@ class RedTeamResult():
             risk_category = conversation.get("risk_category", "")
             attack_success = conversation.get("attack_success")
             risk_assessment = conversation.get("risk_assessment", {})
             for i in range(0, len(messages) - 1, 2):
                 if i + 1 < len(messages):
                     user_msg = messages[i]
                     assistant_msg = messages[i + 1]
                     if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
                         # Create the base query-response pair
                         qr_pair = {
@@ -317,24 +328,24 @@ class RedTeamResult():
                             "response": assistant_msg.get("content", ""),
                             "risk_category": risk_category,
                             "attack_strategy": attack_technique,
-                            "attack_complexity": attack_complexity
+                            "attack_complexity": attack_complexity,
                         }
                         # Add evaluation data if available
                         if attack_success is not None:
                             qr_pair["attack_success"] = str(attack_success).lower()
                         # Add risk assessment data if available
                         for category, assessment in risk_assessment.items() if risk_assessment else {}:
                             if assessment and assessment.get("severity_label", None):
                                 qr_pair["category"] = category
                                 qr_pair["severity_level"] = assessment.get("severity_label", "").lower()
                                 # Add threshold if available in the future
                         result_lines.append(json.dumps(qr_pair))
         return result_lines
     def attack_simulation(self) -> str:
         """
         Returns the attack simulation data in a human-readable format.
@@ -343,9 +354,9 @@ class RedTeamResult():
         """
         if not self.attack_details:
             return ""
         result_lines = []
         for conversation in self.attack_details:
             messages = conversation.get("conversation", [])
             attack_technique = conversation.get("attack_technique", "")
@@ -353,30 +364,30 @@ class RedTeamResult():
             risk_category = conversation.get("risk_category", "")
             attack_success = conversation.get("attack_success")
             risk_assessment = conversation.get("risk_assessment", {})
             result_lines.append(f"Attack Technique: {attack_technique}")
             result_lines.append(f"Attack Complexity: {attack_complexity}")
             result_lines.append(f"Risk Category: {risk_category}")
             result_lines.append("")
             for i in range(0, len(messages) - 1, 2):
                 if i + 1 < len(messages):
                     user_msg = messages[i]
                     assistant_msg = messages[i + 1]
                     if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
                         result_lines.append(f"User: {user_msg.get('content', '')}")
                         result_lines.append(f"Assistant: {assistant_msg.get('content', '')}")
                         result_lines.append("")
             if attack_success is not None:
                 result_lines.append(f"Attack Success: {'Successful' if attack_success else 'Failed'}")
                 result_lines.append("")
             for category, assessment in risk_assessment.items() if risk_assessment else {}:
                 if assessment and assessment.get("severity_label", None):
                     result_lines.append(f"Category: {category}")
                     result_lines.append(f"Severity Level: {assessment.get('severity_label', '')}")
                     result_lines.append("")
-        return "\n".join(result_lines)
+        return "\n".join(result_lines)

azure/ai/evaluation/red_team/_utils/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
+# ---------------------------------------------------------

azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py CHANGED Viewed

@@ -11,11 +11,16 @@ import asyncio
 import re
 from typing import Dict, Optional, Any, Tuple, List
 from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
-from azure.ai.evaluation.simulator._model_tools._generated_rai_client import GeneratedRAIClient
+from azure.ai.evaluation.simulator._model_tools._generated_rai_client import (
+    GeneratedRAIClient,
+)
 from pyrit.models import PromptRequestResponse, construct_response_from_request
 from pyrit.prompt_target import PromptChatTarget
-from .constants import USER_AGENT
-from .metric_mapping import get_metric_from_risk_category
+from .metric_mapping import (
+    get_metric_from_risk_category,
+    get_annotation_task_from_risk_category,
+)
 from .._attack_objective_generator import RiskCategory
@@ -23,13 +28,13 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
     """A class to handle chat-based interactions with the RAI service for evaluation purposes."""
     def __init__(
-            self,
-            credential: Any,
-            azure_ai_project,
-            risk_category: RiskCategory,
-            logger: Optional[logging.Logger] = None,
-            evaluator_name: Optional[str] = None,
-        ) -> None:
+        self,
+        credential: Any,
+        azure_ai_project,
+        risk_category: RiskCategory,
+        logger: Optional[logging.Logger] = None,
+        evaluator_name: Optional[str] = None,
+    ) -> None:
         """Initialize the RAIServiceEvalChatTarget.
         :param endpoint: The endpoint URL for the RAI service.
@@ -44,36 +49,38 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
         self.credential = credential
         self.azure_ai_project = azure_ai_project
-    async def send_prompt_async(self, *, prompt_request: PromptRequestResponse, objective: str = "") -> PromptRequestResponse:
+    async def send_prompt_async(
+        self, *, prompt_request: PromptRequestResponse, objective: str = ""
+    ) -> PromptRequestResponse:
         self.logger.info("Starting send_prompt_async operation")
         self._validate_request(prompt_request=prompt_request)
-        thing_to_eval = prompt_request.request_pieces[0].to_dict()['original_value']
-        thing_to_eval_qr = {
-            "query":"",
-            "response": thing_to_eval
-        }
+        thing_to_eval = prompt_request.request_pieces[0].to_dict()["original_value"]
+        thing_to_eval_qr = {"query": "query", "response": thing_to_eval}
         metric_name = get_metric_from_risk_category(self.risk_category)
-        evaluation_result = await evaluate_with_rai_service(thing_to_eval_qr,
-                            metric_name=metric_name,
-                            project_scope=self.azure_ai_project,
-                            credential=self.credential)
+        annotation_task = get_annotation_task_from_risk_category(self.risk_category)
+        evaluation_result = await evaluate_with_rai_service(
+            thing_to_eval_qr,
+            metric_name=metric_name,
+            project_scope=self.azure_ai_project,
+            credential=self.credential,
+            annotation_task=annotation_task,
+        )
         self.logger.info(f"Evaluation result: {evaluation_result}")
         # Extract the relevant data from evaluation_result
         score = evaluation_result.get(f"{metric_name}_score", 0)
         reason = evaluation_result.get(f"{metric_name}_reason", "")
         threshold = evaluation_result.get(f"{metric_name}_threshold", 3)
         result = evaluation_result.get(metric_name, "")
         # Calculate pass/fail using the same logic as in _evaluate_conversation
         # Convert to boolean then to string for PyRIT's true/false score
         # Ensure we're working with numeric values for comparison
         boolean_result = float(score) > float(threshold)
         # Format the response in a way that PyRIT's scorer can parse
         # Use "true"/"false" string for score_value which is required for true_false score type
         scorer_compatible_response = {
@@ -84,13 +91,13 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
                 "raw_score": score,
                 "threshold": threshold,
                 "result": result,
-                "risk_category": self.risk_category
-            }
+                "risk_category": self.risk_category,
+            },
         }
         # Convert to JSON string
         response_json = json.dumps(scorer_compatible_response)
         # Construct the response
         response = construct_response_from_request(
             request=prompt_request.request_pieces[0],
@@ -98,19 +105,18 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
         )
         self.logger.info(f"Constructed response: {response}")
         return response
     def is_json_response_supported(self) -> bool:
         """Check if JSON response is supported.
         :return: True if JSON response is supported, False otherwise
         """
         # This target supports JSON responses
         return True
     def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
         """Validate the request.
         :param prompt_request: The prompt request
         """
         if len(prompt_request.request_pieces) != 1:
@@ -118,4 +124,3 @@ class RAIServiceEvalChatTarget(PromptChatTarget):
         if prompt_request.request_pieces[0].converted_value_data_type != "text":
             raise ValueError("This target only supports text prompt input.")

azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.10.0py3-none-any.whl