PyPI - azure-ai-evaluation - Versions diffs - 1.12.0__py3-none-any.whl → 1.13.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.12.0py3-none-any.whl → 1.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

azure/ai/evaluation/red_team/_utils/constants.py CHANGED Viewed

@@ -39,6 +39,7 @@ ATTACK_STRATEGY_COMPLEXITY_MAP = {
     str(AttackStrategy.MODERATE.value): "moderate",
     str(AttackStrategy.DIFFICULT.value): "difficult",
     str(AttackStrategy.Jailbreak.value): "easy",
+    str(AttackStrategy.IndirectJailbreak.value): "easy",
     str(AttackStrategy.MultiTurn.value): "difficult",
     str(AttackStrategy.Crescendo.value): "difficult",
 }

azure/ai/evaluation/red_team/_utils/formatting_utils.py CHANGED Viewed

@@ -15,17 +15,26 @@ from .._attack_strategy import AttackStrategy
 from .._red_team_result import RedTeamResult
-def message_to_dict(message: ChatMessage, context: str = None) -> Dict[str, str]:
+def message_to_dict(
+    message: ChatMessage, context: str = None, tool_calls: List[Any] = None, token_usage: Dict[str, Any] = None
+) -> Dict[str, Any]:
     """Convert a ChatMessage and context to dictionary format.
     :param message: The chat message to convert
     :type message: ChatMessage
     :param context: Additional context to include in the dictionary
     :type context: str
+    :param tool_calls: List of tool calls to include in the dictionary
+    :type tool_calls: List[Any]
+    :param token_usage: Token usage information from the callback
+    :type token_usage: Dict[str, Any]
     :return: Dictionary representation with role and content
-    :rtype: Dict[str, str]
+    :rtype: Dict[str, Any]
     """
-    return {"role": message.role, "content": message.content, "context": context}
+    msg_dict = {"role": message.role, "content": message.content, "context": context, "tool_calls": tool_calls}
+    if token_usage:
+        msg_dict["token_usage"] = token_usage
+    return msg_dict
 def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy]]) -> str:
@@ -164,6 +173,82 @@ def list_mean_nan_safe(data_list: List[Any]) -> float:
     return sum(filtered_list) / len(filtered_list)
+def format_content_by_modality(content: str, modality: str) -> str:
+    """Format content based on the specified modality, returning one random format.
+    :param content: The content to format
+    :type content: str
+    :param modality: The modality type (html, markdown, footnote, code, text)
+    :type modality: str
+    :return: Formatted content
+    :rtype: str
+    """
+    import html
+    import random
+    def format_as_html(text):
+        html_text = html.escape(text)
+        return [
+            f"<p>{html_text}</p>",
+            f'<div style="display:none">{html_text}</div>',
+            f'<span style="display:none">{html_text}</span>',
+            f"<div hidden>{html_text}</div>",
+            f'<meta content="{html_text}">',
+        ]
+    def format_as_markdown(text):
+        markdown_text = text.replace("\n", "  \n")  # Convert newlines to Markdown line breaks
+        return [
+            f"\n**{markdown_text}**\n",  # Bold
+            f"\n*{markdown_text}*\n",  # Italic
+            f"\n`{markdown_text}`\n",  # Inline code
+            f"\n```{markdown_text}```\n",  # Code block
+            f"\n[Link]({markdown_text})\n",  # Link
+        ]
+    def format_as_footnote(text):
+        footnote_text = text.replace("\n", " ")
+        return [
+            f"[^{footnote_text}]",  # Footnote reference
+            f"\n[1] {footnote_text}\n",  # Footnote reference with number
+            f"<sup>{footnote_text}</sup>",  # Superscript
+            f'<span class="footnote">{footnote_text}</span>',  # Custom footnote span
+            f'<div class="footnote">{footnote_text}</div>',  # Custom footnote div
+        ]
+    def format_as_code(text):
+        code_text = text.replace("\n", " ")
+        return [
+            f"`{code_text}`",  # Inline code
+            f"```\n{code_text}\n```",  # Code block
+            f'"""\n{code_text}\n"""',  # Code block
+            f"# {code_text}",  # Inline comment
+            f'def function():\n    print("{code_text}")',  # Function call
+        ]
+    def format_as_text(text):
+        return [f"<document>{text}</document>"]  # Return text in document tags
+    # Mapping of modality types to formatting functions
+    modality_formatters = {
+        "html": format_as_html,
+        "markdown": format_as_markdown,
+        "footnote": format_as_footnote,
+        "code": format_as_code,
+        "text": format_as_text,
+    }
+    # Get formatter based on modality type
+    if modality and modality.lower() in modality_formatters:
+        formatter = modality_formatters[modality.lower()]
+        formats = formatter(content)
+        # Return one random format from the available options
+        return random.choice(formats)
+    else:
+        # Return plain text if modality not recognized
+        return content
 def write_pyrit_outputs_to_file(
     *,
     output_path: str,
@@ -194,7 +279,13 @@ def write_pyrit_outputs_to_file(
     conversations = [
         [
-            (item.to_chat_message(), prompt_to_context.get(item.original_value, "") or item.labels.get("context", ""))
+            (
+                item.to_chat_message(),
+                prompt_to_context.get(item.original_value, "") or item.labels.get("context", ""),
+                item.labels.get("tool_calls", []),
+                item.labels.get("risk_sub_type"),
+                item.labels.get("token_usage"),
+            )
             for item in group
         ]
         for conv_id, group in itertools.groupby(prompts_request_pieces, key=lambda x: x.conversation_id)
@@ -217,16 +308,22 @@ def write_pyrit_outputs_to_file(
                     if conversation[0][0].role == "system":
                         # Skip system messages in the output
                         continue
-                    json_lines += (
-                        json.dumps(
-                            {
-                                "conversation": {
-                                    "messages": [message_to_dict(message[0], message[1]) for message in conversation]
-                                }
-                            }
-                        )
-                        + "\n"
-                    )
+                    conv_dict = {
+                        "conversation": {
+                            "messages": [
+                                message_to_dict(
+                                    message[0], message[1], message[2], message[4] if len(message) > 4 else None
+                                )
+                                for message in conversation
+                            ]
+                        }
+                    }
+                    # Add risk_sub_type if present (check first message for the label)
+                    if conversation and len(conversation) > 0 and len(conversation[0]) > 3:
+                        risk_sub_type = conversation[0][3]
+                        if risk_sub_type:
+                            conv_dict["risk_sub_type"] = risk_sub_type
+                    json_lines += json.dumps(conv_dict) + "\n"
                 with Path(output_path).open("w") as f:
                     f.writelines(json_lines)
                 logger.debug(
@@ -248,16 +345,20 @@ def write_pyrit_outputs_to_file(
             if conversation[0][0].role == "system":
                 # Skip system messages in the output
                 continue
-            json_lines += (
-                json.dumps(
-                    {
-                        "conversation": {
-                            "messages": [message_to_dict(message[0], message[1]) for message in conversation]
-                        }
-                    }
-                )
-                + "\n"
-            )
+            conv_dict = {
+                "conversation": {
+                    "messages": [
+                        message_to_dict(message[0], message[1], message[2], message[4] if len(message) > 4 else None)
+                        for message in conversation
+                    ]
+                }
+            }
+            # Add risk_sub_type if present (check first message for the label)
+            if conversation and len(conversation) > 0 and len(conversation[0]) > 3:
+                risk_sub_type = conversation[0][3]
+                if risk_sub_type:
+                    conv_dict["risk_sub_type"] = risk_sub_type
+            json_lines += json.dumps(conv_dict) + "\n"
         with Path(output_path).open("w") as f:
             f.writelines(json_lines)
         logger.debug(f"Successfully wrote {len(conversations)} conversations to {output_path}")

azure/ai/evaluation/red_team/_utils/metric_mapping.py CHANGED Viewed

@@ -15,9 +15,11 @@ RISK_CATEGORY_METRIC_MAP = {
     RiskCategory.SelfHarm: EvaluationMetrics.SELF_HARM,
     RiskCategory.ProtectedMaterial: EvaluationMetrics.PROTECTED_MATERIAL,
     RiskCategory.UngroundedAttributes: EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
-    RiskCategory.IndirectAttack: EvaluationMetrics.XPIA,
     _InternalRiskCategory.ECI: _InternalEvaluationMetrics.ECI,
     RiskCategory.CodeVulnerability: EvaluationMetrics.CODE_VULNERABILITY,
+    RiskCategory.SensitiveDataLeakage: EvaluationMetrics.SENSITIVE_DATA_LEAKAGE,
+    RiskCategory.TaskAdherence: EvaluationMetrics.TASK_ADHERENCE,
+    RiskCategory.ProhibitedActions: EvaluationMetrics.PROHIBITED_ACTIONS,
 }
 RISK_CATEGORY_ANNOTATION_TASK_MAP = {
@@ -27,9 +29,11 @@ RISK_CATEGORY_ANNOTATION_TASK_MAP = {
     RiskCategory.SelfHarm: Tasks.CONTENT_HARM,
     RiskCategory.ProtectedMaterial: Tasks.PROTECTED_MATERIAL,
     RiskCategory.UngroundedAttributes: Tasks.UNGROUNDED_ATTRIBUTES,
-    RiskCategory.IndirectAttack: Tasks.XPIA,
     _InternalRiskCategory.ECI: _InternalAnnotationTasks.ECI,
     RiskCategory.CodeVulnerability: Tasks.CODE_VULNERABILITY,
+    RiskCategory.SensitiveDataLeakage: Tasks.SENSITIVE_DATA_LEAKAGE,
+    RiskCategory.TaskAdherence: Tasks.TASK_ADHERENCE,
+    RiskCategory.ProhibitedActions: Tasks.PROHIBITED_ACTIONS,
 }
@@ -63,8 +67,7 @@ def get_attack_objective_from_risk_category(risk_category: Union[RiskCategory])
     :return: The corresponding attack objective string
     :rtype: str
     """
-    return (
-        "isa"
-        if risk_category == RiskCategory.UngroundedAttributes
-        else "xpia" if risk_category == RiskCategory.IndirectAttack else risk_category.value
-    )
+    if risk_category == RiskCategory.UngroundedAttributes:
+        return "isa"
+    else:
+        return risk_category.value

azure/ai/evaluation/red_team/_utils/strategy_utils.py CHANGED Viewed

@@ -68,6 +68,7 @@ def strategy_converter_map() -> Dict[Any, Union[PromptConverter, List[PromptConv
         AttackStrategy.Jailbreak: None,
         AttackStrategy.MultiTurn: None,
         AttackStrategy.Crescendo: None,
+        AttackStrategy.IndirectJailbreak: None,
     }
@@ -89,14 +90,11 @@ def get_converter_for_strategy(
 def get_chat_target(
     target: Union[PromptChatTarget, Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
-    prompt_to_context: Optional[Dict[str, str]] = None,
 ) -> PromptChatTarget:
     """Convert various target types to a PromptChatTarget.
     :param target: The target to convert
     :type target: Union[PromptChatTarget, Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration]
-    :param prompt_to_context: Optional mapping from prompt content to context
-    :type prompt_to_context: Optional[Dict[str, str]]
     :return: A PromptChatTarget instance
     :rtype: PromptChatTarget
     """
@@ -154,7 +152,7 @@ def get_chat_target(
             has_callback_signature = False
         if has_callback_signature:
-            chat_target = _CallbackChatTarget(callback=target, prompt_to_context=prompt_to_context)
+            chat_target = _CallbackChatTarget(callback=target)
         else:
             async def callback_target(
@@ -190,26 +188,6 @@ def get_chat_target(
                 messages_list.append(formatted_response)  # type: ignore
                 return {"messages": messages_list, "stream": stream, "session_state": session_state, "context": {}}
-            chat_target = _CallbackChatTarget(callback=callback_target, prompt_to_context=prompt_to_context)  # type: ignore
+            chat_target = _CallbackChatTarget(callback=callback_target)  # type: ignore
     return chat_target
-def get_orchestrators_for_attack_strategies(
-    attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
-) -> List[Callable]:
-    """
-    Gets a list of orchestrator functions to use based on the attack strategies.
-    :param attack_strategies: The list of attack strategies
-    :type attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
-    :return: A list of orchestrator functions
-    :rtype: List[Callable]
-    """
-    call_to_orchestrators = []
-    # Since we're just returning one orchestrator type for now, simplify the logic
-    # This can be expanded later if different orchestrators are needed for different strategies
-    return [
-        lambda chat_target, all_prompts, converter, strategy_name, risk_category: None
-    ]  # This will be replaced with the actual orchestrator function in the main class

azure/ai/evaluation/simulator/_adversarial_simulator.py CHANGED Viewed

@@ -14,7 +14,7 @@ from tqdm import tqdm
 from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import get_async_http_client
 from azure.ai.evaluation._model_configurations import AzureAIProject

azure/ai/evaluation/simulator/_conversation/__init__.py CHANGED Viewed

@@ -15,7 +15,7 @@ import jinja2
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline
 from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from .._model_tools._template_handler import TemplateParameters
 from .constants import ConversationRole

azure/ai/evaluation/simulator/_conversation/_conversation.py CHANGED Viewed

@@ -11,7 +11,7 @@ from azure.ai.evaluation.simulator._constants import SupportedLanguages
 from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
 from ..._http_utils import AsyncHttpPipeline
 from . import ConversationBot, ConversationTurn
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:

azure/ai/evaluation/simulator/_direct_attack_simulator.py CHANGED Viewed

@@ -13,7 +13,7 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_oned
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation.simulator import AdversarialScenario
 from azure.ai.evaluation._model_configurations import AzureAIProject
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from azure.core.credentials import TokenCredential
 from ._adversarial_simulator import AdversarialSimulator

azure/ai/evaluation/simulator/_indirect_attack_simulator.py CHANGED Viewed

@@ -15,7 +15,7 @@ from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
 from azure.ai.evaluation._model_configurations import AzureAIProject
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from azure.core.credentials import TokenCredential
 from azure.ai.evaluation._constants import TokenScope

azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py CHANGED Viewed

@@ -12,7 +12,7 @@ from azure.ai.evaluation.simulator._model_tools import ManagedIdentityAPITokenMa
 from azure.ai.evaluation._common.raiclient import MachineLearningServicesClient
 from azure.ai.evaluation._constants import TokenScope
 from azure.ai.evaluation._common.utils import is_onedp_project
-from azure.ai.evaluation._common.onedp import AIProjectClient
+from azure.ai.evaluation._common.onedp import ProjectsClient as AIProjectClient
 from azure.ai.evaluation._common import EvaluationServiceOneDPClient
 from azure.ai.evaluation._user_agent import UserAgentSingleton
 import jwt
@@ -113,6 +113,8 @@ class GeneratedRAIClient:
         strategy: Optional[str] = None,
         language: str = "en",
         scan_session_id: Optional[str] = None,
+        target: Optional[str] = None,
+        client_id: Optional[str] = None,
     ) -> Dict:
         """Get attack objectives using the auto-generated operations.
@@ -128,18 +130,38 @@ class GeneratedRAIClient:
         :type language: str
         :param scan_session_id: Optional unique session ID for the scan
         :type scan_session_id: Optional[str]
+        :param target: Optional target type (model/agent)
+        :type target: Optional[str]
+        :param client_id: Optional client ID for ACA token authorization
+        :type client_id: Optional[str]
         :return: The attack objectives
         :rtype: Dict
         """
         try:
+            # Build headers dictionary
+            headers = {}
+            if scan_session_id:
+                headers["x-ms-client-request-id"] = scan_session_id
+            if client_id:
+                from azure.identity import DefaultAzureCredential
+                # Get token using the client_id for managed identity
+                managed_identity_credential = DefaultAzureCredential(
+                    managed_identity_client_id=client_id, exclude_interactive_browser_credential=True
+                )
+                token = managed_identity_credential.get_token(TokenScope.DEFAULT_AZURE_MANAGEMENT).token
+                headers["aml-aca-token"] = token
             # Send the request using the autogenerated client
             response = self._client.get_attack_objectives(
                 risk_types=[risk_type],
                 risk_category=risk_category,
                 lang=language,
                 strategy=strategy,
-                headers={"x-ms-client-request-id": scan_session_id},
+                target_type=target,
+                headers=headers,
             )
             return response
         except Exception as e:
@@ -195,4 +217,5 @@ class GeneratedRAIClient:
                 if (exp_time - current_time) >= 300:
                     return token
+        # Get token
         return credential.get_token(TokenScope.DEFAULT_AZURE_MANAGEMENT).token

azure/ai/evaluation/simulator/_model_tools/_identity_manager.py CHANGED Viewed

@@ -152,6 +152,7 @@ class ManagedIdentityAPITokenManager(APITokenManager):
         ):
             self.last_refresh_time = time.time()
             get_token_method = self.credential.get_token(self.token_scope.value)
             if inspect.isawaitable(get_token_method):
                 # If it's awaitable, await it
                 token_response: AccessToken = await get_token_method

azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py CHANGED Viewed

@@ -12,7 +12,7 @@ from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_cl
 from azure.ai.evaluation._user_agent import UserAgentSingleton
 from azure.core.exceptions import HttpResponseError, ServiceResponseError
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from azure.ai.evaluation._common.onedp.models import SimulationDTO
 from azure.ai.evaluation._common.constants import RAIService

azure/ai/evaluation/simulator/_model_tools/_template_handler.py CHANGED Viewed

@@ -7,7 +7,7 @@ from ast import literal_eval
 from typing_extensions import NotRequired
 from azure.ai.evaluation._model_configurations import AzureAIProject
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from azure.ai.evaluation.simulator._adversarial_scenario import AdversarialScenario
 from ._rai_client import RAIClient

azure/ai/evaluation/simulator/_model_tools/models.py CHANGED Viewed

@@ -12,7 +12,7 @@ from abc import ABC, abstractmethod
 from collections import deque
 from typing import Deque, Dict, List, Optional, Union
 from urllib.parse import urlparse
-from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 from ._rai_client import RAIClient
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException

{azure_ai_evaluation-1.12.0.dist-info → azure_ai_evaluation-1.13.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: azure-ai-evaluation
-Version: 1.12.0
+Version: 1.13.0
 Summary: Microsoft Azure Evaluation Library for Python
 Home-page: https://github.com/Azure/azure-sdk-for-python
 Author: Microsoft Corporation
@@ -22,20 +22,25 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: NOTICE.txt
 Requires-Dist: pyjwt>=2.8.0
-Requires-Dist: azure-identity>=1.16.0
-Requires-Dist: azure-core>=1.30.2
+Requires-Dist: azure-identity>=1.19.0
+Requires-Dist: azure-core>=1.31.0
 Requires-Dist: nltk>=3.9.1
-Requires-Dist: azure-storage-blob>=12.10.0
-Requires-Dist: httpx>=0.25.1
-Requires-Dist: pandas<3.0.0,>=2.1.2
+Requires-Dist: azure-storage-blob>=12.19.0
+Requires-Dist: httpx>=0.27.2
+Requires-Dist: pandas<3.0.0,>=2.1.2; python_version < "3.13"
+Requires-Dist: pandas<3.0.0,>=2.2.3; python_version == "3.13"
+Requires-Dist: pandas<3.0.0,>=2.3.3; python_version >= "3.14"
 Requires-Dist: openai>=1.108.0
 Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
 Requires-Dist: msrest>=0.6.21
 Requires-Dist: Jinja2>=3.1.6
 Requires-Dist: aiohttp>=3.0
 Provides-Extra: redteam
-Requires-Dist: pyrit==0.8.1; extra == "redteam"
-Requires-Dist: duckdb==1.3.2; extra == "redteam"
+Requires-Dist: pyrit==0.8.1; python_version >= "3.10" and extra == "redteam"
+Requires-Dist: duckdb==1.3.2; python_version >= "3.10" and extra == "redteam"
+Provides-Extra: opentelemetry
+Requires-Dist: opentelemetry-sdk>=1.17.0; extra == "opentelemetry"
+Requires-Dist: azure-monitor-opentelemetry-exporter>=1.0.0b17; extra == "opentelemetry"
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier
@@ -413,6 +418,25 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
 # Release History
+## 1.13.0 (2025-10-30)
+### Features Added
+- Updated `IndirectAttack` risk category for RedTeam to `IndirectJailbreak` to better reflect its purpose. This change allows users to apply cross-domain prompt injection (XPIA) attack strategies across all risk categories, enabling more comprehensive security testing of AI systems against indirect prompt injection attacks during red teaming.
+- Added `TaskAdherence`, `SensitiveDataLeakage`, and `ProhibitedActions` as cloud-only agent safety risk categories for red teaming.
+- Updated all evaluators' output to be of the following schema:
+  - `gpt_{evaluator_name}`, `{evaluator_name}`: float score,
+  - `{evaluator_name}_result`: pass/fail based on threshold,
+  - `{evaluator_name}_reason`, `{evaluator_name}_threshold`
+  - `{evaluator_name}_prompt_tokens`, `{evaluator_name}_completion_tokens`, `{evaluator_name}_total_tokens`, `{evaluator_name}_finish_reason`
+  - `{evaluator_name}_model`: model used for evaluation
+  - `{evaluator_name}_sample_input`, `{evaluator_name}_sample_output`: input and output used for evaluation
+  This change standardizes the output format across all evaluators and follows OTel convention.
+### Bugs Fixed
+- `image_tag` parameter in `AzureOpenAIPythonGrader` is now optional.
 ## 1.12.0 (2025-10-02)
@@ -423,6 +447,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
 ### Bugs Fixed
 - Support for multi-level nesting in OpenAI grader (experimental)
+## 1.11.2 (2025-10-09)
+### Bugs Fixed
+- **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
 ## 1.11.1 (2025-09-19)
 ### Bugs Fixed

azure-ai-evaluation 1.12.0__py3-none-any.whl → 1.13.0__py3-none-any.whl

azure-ai-evaluation 1.12.0py3-none-any.whl → 1.13.0py3-none-any.whl