PyPI - azure-ai-evaluation - Versions diffs - 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (144) hide show

azure/ai/evaluation/_legacy/prompty/_prompty.py CHANGED Viewed

@@ -2,20 +2,27 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import asyncio
 import re
+from logging import Logger
 from os import PathLike
 from pathlib import Path
-from typing import Any, AsyncGenerator, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union, cast
+from typing import Any, AsyncGenerator, Awaitable, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union, cast
-from openai import AsyncAzureOpenAI, AsyncOpenAI, NotGiven
+from openai import AsyncAzureOpenAI, AsyncOpenAI, NotGiven, OpenAIError
+from openai.lib.azure import AsyncAzureADTokenProvider
+from azure.core.credentials import TokenCredential
+from azure.core.credentials_async import AsyncTokenCredential
 from azure.ai.evaluation._exceptions import ErrorTarget
-from azure.ai.evaluation._constants import DefaultOpenEncoding
+from azure.ai.evaluation._constants import DefaultOpenEncoding, TokenScope
 from azure.ai.evaluation._legacy.prompty._exceptions import (
     InvalidInputError,
     PromptyException,
     MissingRequiredInputError,
     NotSupportedError,
+    WrappedOpenAIError,
 )
 from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection, Connection, OpenAIConnection
 from azure.ai.evaluation._legacy.prompty._yaml_utils import load_yaml_string
@@ -25,10 +32,14 @@ from azure.ai.evaluation._legacy.prompty._utils import (
     OpenAIChatResponseType,
     build_messages,
     format_llm_response,
+    openai_error_retryable,
     prepare_open_ai_request_params,
     resolve_references,
     update_dict_recursively,
 )
+from azure.ai.evaluation._constants import DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
+from azure.ai.evaluation._legacy._common._logging import get_logger
+from azure.ai.evaluation._legacy._common._async_token_provider import AsyncAzureTokenProvider
 PROMPTY_EXTENSION: Final[str] = ".prompty"
@@ -124,10 +135,24 @@ class AsyncPrompty:
     def __init__(
         self,
         path: Union[str, PathLike],
+        *,
+        logger: Optional[Logger] = None,
+        token_credential: Optional[Union[TokenCredential, AsyncTokenCredential]] = None,
+        is_reasoning_model: bool = False,
         **kwargs: Any,
     ):
         path = Path(path)
         configs, self._template = self._parse_prompty(path)
+        if is_reasoning_model:
+            parameters = configs.get("model", {}).get("parameters", {})
+            if "max_tokens" in parameters:
+                parameters.pop("max_tokens", None)
+                parameters["max_completion_tokens"] = DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
+            # Remove unsupported parameters for reasoning models
+            for key in ["temperature", "top_p", "presence_penalty", "frequency_penalty"]:
+                parameters.pop(key, None)
         configs = resolve_references(configs, base_path=path.parent)
         configs = update_dict_recursively(configs, resolve_references(kwargs, base_path=path.parent))
@@ -142,6 +167,9 @@ class AsyncPrompty:
         self._inputs: Dict[str, Any] = configs.get("inputs", {})
         self._outputs: Dict[str, Any] = configs.get("outputs", {})
         self._name: str = configs.get("name", path.stem)
+        self._logger = logger or get_logger(__name__)
+        self._token_credential: Union[TokenCredential, AsyncTokenCredential] = \
+            token_credential or AsyncAzureTokenProvider()
     @property
     def path(self) -> Path:
@@ -234,9 +262,6 @@ class AsyncPrompty:
         return resolved_inputs
-    # TODO ralphe: error handling
-    # @trace
-    # @handle_openai_error()
     async def __call__(  # pylint: disable=docstring-keyword-should-match-keyword-only
         self,
         **kwargs: Any,
@@ -257,7 +282,7 @@ class AsyncPrompty:
         messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
         params = prepare_open_ai_request_params(self._model, messages)
-        timeout: Union[NotGiven, float] = NotGiven()
+        timeout: Optional[float] = None
         if timeout_val := cast(Any, kwargs.get("timeout", None)):
             timeout = float(timeout_val)
@@ -273,6 +298,9 @@ class AsyncPrompty:
                 azure_deployment=connection.azure_deployment,
                 api_version=connection.api_version,
                 max_retries=max_retries,
+                azure_ad_token_provider=(self.get_token_provider(self._token_credential)
+                    if not connection.api_key
+                    else None),
             )
         elif isinstance(connection, OpenAIConnection):
             api_client = AsyncOpenAI(
@@ -286,8 +314,10 @@ class AsyncPrompty:
                 f"'{type(connection).__name__}' is not a supported connection type.", target=ErrorTarget.EVAL_RUN
             )
-        response: OpenAIChatResponseType = await api_client.with_options(timeout=timeout).chat.completions.create(
-            **params
+        response: OpenAIChatResponseType = await self._send_with_retries(
+            api_client=api_client,
+            params=params,
+            timeout=timeout,
         )
         return await format_llm_response(
@@ -311,3 +341,83 @@ class AsyncPrompty:
         inputs = self._resolve_inputs(kwargs)
         messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
         return messages
+    async def _send_with_retries(
+        self,
+        api_client: Union[AsyncAzureOpenAI, AsyncOpenAI],
+        params: Mapping[str, Any],
+        timeout: Optional[float],
+        max_retries: int = 10,
+        max_entity_retries: int = 3,
+    ) -> OpenAIChatResponseType:
+        """Send the request with retries.
+        :param Union[AsyncAzureOpenAI, AsyncOpenAI] api_client: The OpenAI client.
+        :param Mapping[str, Any] params: The request parameters.
+        :param Optional[float] timeout: The timeout for the request.
+        :param int max_retries: The maximum number of retries.
+        :param int max_entity_retries: The maximum number of retries for entity errors.
+        :return: The response from OpenAI.
+        :rtype: OpenAIChatResponseType
+        """
+        client_name: str = api_client.__class__.__name__
+        client: Union[AsyncAzureOpenAI, AsyncOpenAI] = api_client.with_options(timeout=timeout or NotGiven())
+        entity_retries: List[int] = [0]
+        should_retry: bool = True
+        retry: int = 0
+        delay: Optional[float] = None
+        while should_retry:
+            try:
+                if delay:
+                    await asyncio.sleep(delay)
+                response = await client.chat.completions.create(**params)
+                return response
+            except OpenAIError as error:
+                if retry >= max_retries:
+                    should_retry = False
+                else:
+                    should_retry, delay = openai_error_retryable(error, retry, entity_retries, max_entity_retries)
+                if should_retry:
+                    self._logger.warning(
+                        "[%d/%d] %s request failed. %s: %s. Retrying in %f seconds.",
+                        retry,
+                        max_retries,
+                        client_name,
+                        type(error).__name__,
+                        str(error),
+                        delay or 0.0,
+                        exc_info=True,
+                    )
+                else:
+                    self._logger.exception(
+                        "[%d/%d] %s request failed. %s: %s",
+                        retry,
+                        max_retries,
+                        client_name,
+                        type(error).__name__,
+                        str(error),
+                    )
+                    raise WrappedOpenAIError(error=error) from error
+                retry += 1
+    @staticmethod
+    def get_token_provider(cred: Union[TokenCredential, AsyncTokenCredential]) -> AsyncAzureADTokenProvider:
+        """Get the token provider for the prompty.
+        :param Union[TokenCredential, AsyncTokenCredential] cred: The Azure authentication credential.
+        :return: The token provider if a credential is provided, otherwise None.
+        :rtype: Optional[AsyncAzureADTokenProvider]
+        """
+        async def _wrapper() -> str:
+            token = cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT)
+            if isinstance(token, Awaitable):
+                token = await token
+            return token.token
+        return _wrapper

azure/ai/evaluation/_legacy/prompty/_utils.py CHANGED Viewed

@@ -2,12 +2,15 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+# cspell:ignore apng, retriable
 import copy
-from dataclasses import dataclass, is_dataclass, fields
 import os
 import re
 import json
 import base64
+from dataclasses import dataclass, is_dataclass, fields
+from logging import Logger
 from pathlib import Path
 from typing import (
     Any,
@@ -30,6 +33,7 @@ from typing import (
 from jinja2 import Template
 from openai import AsyncStream
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
+from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAIError
 from azure.ai.evaluation._constants import DefaultOpenEncoding
 from azure.ai.evaluation._legacy.prompty._exceptions import (
@@ -217,7 +221,7 @@ DEFAULT_IMAGE_MIME_TYPE: Final[str] = "image/*"
 """The mime type to use when we don't know the image type"""
 FILE_EXT_TO_MIME: Final[Mapping[str, str]] = {
-    ".apng": "image/apng",  # cspell:ignore apng
+    ".apng": "image/apng",
     ".avif": "image/avif",
     ".bmp": "image/bmp",
     ".gif": "image/gif",
@@ -542,4 +546,70 @@ async def format_llm_response(
     return result
+def openai_error_retryable(
+    error: OpenAIError, retry: int, entity_retry: List[int], max_entity_retries: int
+) -> Tuple[bool, float]:
+    """
+    Determines if an OpenAI error is retryable, and optionally determines the min retry delay to use.
+    If none is returned, the caller will determine the delay to use.
+    :param OpenAIError error: The error to handle
+    :param int retry: The current retry count (0 means we're on the first attempt and no retries have been made)
+    :param List[int] entity_retry: The current retry count for the unprocessable entity failures. This should be a
+        list containing only 1 element to mimic pass by reference semantics. A value of 0 means we're on the
+        first attempt and no retries have been made.
+    :param int max_entity_retries: The maximum number of retries to make for unprocessable entity failures
+    :return: A tuple containing whether the error is retryable and the min delay to use if any
+    :rtype: Tuple[bool, Optional[float]]
+    """
+    # Using https://platform.openai.com/docs/guides/error-codes/api-errors#python-library-error-types as a reference
+    should_retry: bool
+    delay: Optional[float] = None
+    if isinstance(error, APIConnectionError):
+        retriable_error_messages: Sequence[str] = [
+            "connection aborted",
+            # issue 2296
+            "server disconnected without sending a response",
+        ]
+        should_retry = (
+            isinstance(error, APITimeoutError)  # APITimeoutError is a subclass of APIConnectionError
+            or str(error).lower() in retriable_error_messages
+            or str(error.__cause__).lower() in retriable_error_messages
+        )
+    elif isinstance(error, APIStatusError):
+        status_code: int = error.response.status_code
+        if status_code == 422:
+            # As per the original legacy code, UnprocessableEntityError (HTTP 422) should be handled differently
+            # with a smaller retry count, as retrying more may not be beneficial.
+            should_retry = entity_retry[0] < max_entity_retries
+            entity_retry[0] += 1
+        elif status_code == 429:
+            # Two types, one is you are throttled and so should retry after a delay, the other is you have exceeded
+            # your quota and should not retry.
+            if (error.type or "").lower() == "insufficient_quota":
+                should_retry = False
+            else:
+                should_retry = True
+            should_retry = error.type != "insufficient_quota"
+        else:
+            should_retry = status_code >= 500
+        # Use what the service tells us to use for the delay if it's provided
+        if should_retry and not delay:
+            delay_str = error.response.headers.get("Retry-After", None)
+            if delay_str is not None:
+                delay = float(delay_str)
+    else:
+        should_retry = False
+    # Use exponential backoff for retries if the service doesn't provide a delay
+    if not delay:
+        delay = min(60, 2 + 2**retry)
+    return (should_retry, delay)
 # endregion

azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py CHANGED Viewed

@@ -6,9 +6,10 @@ from enum import Enum
 import os
 import inspect
 import logging
+import asyncio
 from datetime import datetime
 from azure.ai.evaluation._common._experimental import experimental
-from typing import Any, Callable, Dict, List, Optional, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Union, cast, Coroutine, TypeVar, Awaitable
 from azure.ai.evaluation._common.math import list_mean_nan_safe
 from azure.ai.evaluation._constants import CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT
 from azure.ai.evaluation._evaluators import (
@@ -20,6 +21,8 @@ from azure.ai.evaluation._evaluators import (
     _fluency,
     _xpia,
     _coherence,
+    _code_vulnerability,
+    _ungrounded_attributes,
 )
 from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
 from azure.ai.evaluation._evaluate import _evaluate
@@ -31,7 +34,7 @@ from azure.ai.evaluation.simulator import (
     AdversarialScenario,
     AdversarialScenarioJailbreak,
     IndirectAttackSimulator,
-    DirectAttackSimulator ,
+    DirectAttackSimulator,
 )
 from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
 from azure.ai.evaluation.simulator._utils import JsonLineList
@@ -71,6 +74,7 @@ class _SafetyEvaluator(Enum):
     """
     CONTENT_SAFETY = "content_safety"
+    CODE_VULNERABILITY = "code_vulnerability"
     GROUNDEDNESS = "groundedness"
     PROTECTED_MATERIAL = "protected_material"
     RELEVANCE = "relevance"
@@ -80,21 +84,22 @@ class _SafetyEvaluator(Enum):
     INDIRECT_ATTACK = "indirect_attack"
     DIRECT_ATTACK = "direct_attack"
     ECI = "eci"
+    UNGROUNDED_ATTRIBUTES = "ungrounded_attributes"
 @experimental
 class _SafetyEvaluation:
     def __init__(
         self,
-        azure_ai_project: dict,
+        azure_ai_project: Union[str, dict],
         credential: TokenCredential,
         model_config: Optional[Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]] = None,
     ):
         """
         Initializes a SafetyEvaluation object.
-        :param azure_ai_project: A dictionary defining the Azure AI project. Required keys are 'subscription_id', 'resource_group_name', and 'project_name'.
-        :type azure_ai_project: Dict[str, str]
+        :param azure_ai_project: A string or dictionary defining the Azure AI project. Required keys are 'subscription_id', 'resource_group_name', and 'project_name'.
+        :type azure_ai_project: Union[str, Dict[str, str]]
         :param credential: The credential for connecting to Azure AI project.
         :type credential: ~azure.core.credentials.TokenCredential
         :param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration.
@@ -106,8 +111,7 @@ class _SafetyEvaluation:
             self.model_config = model_config
         else:
             self.model_config = None
-        validate_azure_ai_project(azure_ai_project)
-        self.azure_ai_project = AzureAIProject(**azure_ai_project)
+        self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
         self.credential = credential
         self.logger = _setup_logger()
@@ -157,6 +161,8 @@ class _SafetyEvaluation:
         adversarial_scenario: Optional[Union[AdversarialScenario, AdversarialScenarioJailbreak, _UnstableAdversarialScenario]] = None,
         source_text: Optional[str] = None,
         direct_attack: bool = False,
+        randomization_seed: Optional[int] = None,
+        concurrent_async_tasks: Optional[int] = 5,
     ) -> Dict[str, str]:
         """
         Generates synthetic conversations based on provided parameters.
@@ -192,10 +198,17 @@ class _SafetyEvaluation:
             context = latest_message.get("context", None)
             latest_context = None
             try:
+                is_async = self._is_async_function(target)
                 if self._check_target_returns_context(target):
-                    response, latest_context = target(query=application_input)
+                    if is_async:
+                        response, latest_context = await target(query=application_input)
+                    else:
+                        response, latest_context = target(query=application_input)
                 else:
-                    response = target(query=application_input)
+                    if is_async:
+                        response = await target(query=application_input)
+                    else:
+                        response = target(query=application_input)
             except Exception as e:
                 response = f"Something went wrong {e!s}"
@@ -234,6 +247,8 @@ class _SafetyEvaluation:
                 conversation_turns=conversation_turns,
                 text=source_text,
                 target=callback,
+                randomization_seed=randomization_seed,
+                concurrent_async_task=concurrent_async_tasks
             )
         # if DirectAttack, run DirectAttackSimulator
@@ -247,6 +262,8 @@ class _SafetyEvaluation:
                 max_conversation_turns=max_conversation_turns,
                 max_simulation_results=max_simulation_results,
                 target=callback,
+                randomization_seed=randomization_seed,
+                concurrent_async_task=concurrent_async_tasks,
             )
             jailbreak_outputs = simulator_outputs["jailbreak"]
             simulator_outputs = simulator_outputs["regular"]
@@ -264,6 +281,7 @@ class _SafetyEvaluation:
                 num_queries=max_simulation_results,
                 target=callback,
                 text=source_text if source_text else "",
+                concurrent_async_tasks=concurrent_async_tasks,
             )
         ## Run AdversarialSimulator
@@ -279,6 +297,8 @@ class _SafetyEvaluation:
                 conversation_turns=conversation_turns,
                 target=callback,
                 text=source_text,
+                randomization_seed=randomization_seed,
+                concurrent_async_task=concurrent_async_tasks
             )
         ## If no outputs are generated, raise an exception
@@ -372,6 +392,10 @@ class _SafetyEvaluation:
                 )
             if evaluator == _SafetyEvaluator.ECI:
                 return _UnstableAdversarialScenario.ECI
+            if evaluator == _SafetyEvaluator.CODE_VULNERABILITY:
+                return AdversarialScenario.ADVERSARIAL_CODE_VULNERABILITY
+            if evaluator == _SafetyEvaluator.UNGROUNDED_ATTRIBUTES:
+                return AdversarialScenario.ADVERSARIAL_UNGROUNDED_ATTRIBUTES
             if evaluator in [
                 _SafetyEvaluator.GROUNDEDNESS,
                 _SafetyEvaluator.RELEVANCE,
@@ -453,6 +477,14 @@ class _SafetyEvaluation:
                 evaluators_dict["eci"] = ECIEvaluator(
                     azure_ai_project=self.azure_ai_project, credential=self.credential
                 )
+            elif evaluator == _SafetyEvaluator.CODE_VULNERABILITY:
+                evaluators_dict["code_vulnerability"] = _code_vulnerability.CodeVulnerabilityEvaluator(
+                    azure_ai_project=self.azure_ai_project, credential=self.credential
+                )
+            elif evaluator == _SafetyEvaluator.UNGROUNDED_ATTRIBUTES:
+                evaluators_dict["ungrounded_attributes"] = _ungrounded_attributes.UngroundedAttributesEvaluator(
+                    azure_ai_project=self.azure_ai_project, credential=self.credential
+                )
             else:
                 msg = (
                     f"Invalid evaluator: {evaluator}. Supported evaluators are: {_SafetyEvaluator.__members__.values()}"
@@ -465,7 +497,7 @@ class _SafetyEvaluation:
                     blame=ErrorBlame.USER_ERROR,
                 )
         return evaluators_dict
     @staticmethod
     def _check_target_returns_context(target: Callable) -> bool:
         """
@@ -478,6 +510,15 @@ class _SafetyEvaluation:
         ret_type = sig.return_annotation
         if ret_type == inspect.Signature.empty:
             return False
+        # Check for Coroutine/Awaitable return types for async functions
+        origin = getattr(ret_type, "__origin__", None)
+        if origin is not None and (origin is Coroutine or origin is Awaitable):
+            args = getattr(ret_type, "__args__", None)
+            if args and len(args) > 0:
+                # For async functions, check the actual return type inside the Coroutine
+                ret_type = args[-1]
         if ret_type is tuple:
             return True
         return False
@@ -494,13 +535,33 @@ class _SafetyEvaluation:
         ret_type = sig.return_annotation
         if ret_type == inspect.Signature.empty:
             return False
+        # Check for Coroutine/Awaitable return types for async functions
+        origin = getattr(ret_type, "__origin__", None)
+        if origin is not None and (origin is Coroutine or origin is Awaitable):
+            args = getattr(ret_type, "__args__", None)
+            if args and len(args) > 0:
+                # For async functions, check the actual return type inside the Coroutine
+                ret_type = args[-1]
         if ret_type is str:
             return True
         return False
     @staticmethod
-    def _check_target_is_callback(target:Callable) -> bool:
+    def _is_async_function(target: Callable) -> bool:
+        """
+        Checks if the target function is an async function.
+        :param target: The target function to check.
+        :type target: Callable
+        :return: True if the target function is async, False otherwise.
+        :rtype: bool
+        """
+        return asyncio.iscoroutinefunction(target)
+    @staticmethod
+    def _check_target_is_callback(target: Callable) -> bool:
         sig = inspect.signature(target)
         param_names = list(sig.parameters.keys())
         return 'messages' in param_names and 'stream' in param_names and 'session_state' in param_names and 'context' in param_names
@@ -560,7 +621,28 @@ class _SafetyEvaluation:
                 category=ErrorCategory.INVALID_VALUE,
                 blame=ErrorBlame.USER_ERROR,
             )
+        if _SafetyEvaluator.CODE_VULNERABILITY in evaluators and num_turns > 1:
+            self.logger.error("Code vulnerability evaluation only supports single-turn conversations.")
+            msg = "Code vulnerability evaluation only supports single-turn conversations."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.UNKNOWN,
+                category=ErrorCategory.INVALID_VALUE,
+                blame=ErrorBlame.USER_ERROR,
+            )
+        if _SafetyEvaluator.UNGROUNDED_ATTRIBUTES in evaluators and num_turns > 1:
+            self.logger.error("Ungrounded attributes evaluation only supports single-turn conversations.")
+            msg = "Ungrounded attributes evaluation only supports single-turn conversations."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.UNKNOWN,
+                category=ErrorCategory.INVALID_VALUE,
+                blame=ErrorBlame.USER_ERROR,
+            )
         if _SafetyEvaluator.CONTENT_SAFETY in evaluators and scenario and num_turns > 1 and scenario != AdversarialScenario.ADVERSARIAL_CONVERSATION:
             self.logger.error(f"Adversarial scenario {scenario} is not supported for content safety evaluation with more than 1 turn.")
             msg = f"Adversarial scenario {scenario} is not supported for content safety evaluation with more than 1 turn."
@@ -604,7 +686,9 @@ class _SafetyEvaluation:
             violence_defects = []
             sexual_defects = []
             self_harm_defects = []
-            for i, row in enumerate(zip(evaluation_result_dict[jailbreak_cols[0]]['rows'], evaluation_result_dict[regular_cols[0]]['rows'])):
+            jailbreak_rows = sorted(evaluation_result_dict[jailbreak_cols[0]]['rows'], key=lambda x: (x.get('inputs.category') is None, x.get('inputs.category', '')))
+            regular_rows = sorted(evaluation_result_dict[regular_cols[0]]['rows'], key=lambda x: (x.get('inputs.category') is None, x.get('inputs.category', '')))
+            for i, row in enumerate(zip(jailbreak_rows, regular_rows)):
                 hate_unfairness_defects.append(1 if (row[0]['outputs.content_safety.hate_unfairness_score'] > CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT and not row[1]['outputs.content_safety.hate_unfairness_score'] > CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT) else 0)
                 violence_defects.append(1 if (row[0]['outputs.content_safety.violence_score'] > CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT and not row[1]['outputs.content_safety.violence_score'] > CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT) else 0)
                 sexual_defects.append(1 if (row[0]['outputs.content_safety.sexual_score'] > CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT and not row[1]['outputs.content_safety.sexual_score'] > CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT) else 0)
@@ -630,7 +714,7 @@ class _SafetyEvaluation:
     async def __call__(
             self,
-            target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
+            target: Union[Callable, Awaitable[Any], AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
             evaluators: List[_SafetyEvaluator] = [],
             evaluation_name: Optional[str] = None,
             num_turns : int = 1,
@@ -643,13 +727,15 @@ class _SafetyEvaluation:
             data_path: Optional[Union[str, os.PathLike]] = None,
             jailbreak_data_path: Optional[Union[str, os.PathLike]] = None,
             output_path: Optional[Union[str, os.PathLike]] = None,
-            data_paths: Optional[Union[Dict[str, str], Dict[str, Union[str,os.PathLike]]]] = None
+            data_paths: Optional[Union[Dict[str, str], Dict[str, Union[str,os.PathLike]]]] = None,
+            randomization_seed: Optional[int] = None,
+            concurrent_async_tasks: Optional[int] = 5,
         ) -> Union[Dict[str, EvaluationResult], Dict[str, str], Dict[str, Union[str,os.PathLike]]]:
         '''
         Evaluates the target function based on the provided parameters.
-        :param target: The target function to call during the evaluation.
-        :type target: Callable
+        :param target: The target function to call during the evaluation. This can be a synchronous or asynchronous function.
+        :type target: Union[Callable, Awaitable[Any], AzureOpenAIModelConfiguration, OpenAIModelConfiguration]
         :param evaluators: A list of SafetyEvaluator.
         :type evaluators: List[_SafetyEvaluator]
         :param evaluation_name: The display name name of the evaluation.
@@ -671,12 +757,17 @@ class _SafetyEvaluation:
         :param data_path: The path to the data file generated by the Simulator. If None, the Simulator will be run.
         :type data_path: Optional[Union[str, os.PathLike]]
         :param jailbreak_data_path: The path to the data file generated by the Simulator for jailbreak scenario. If None, the DirectAttackSimulator will be run.
-        :type jailbreak_data_path: Optional[Union[str, os.PathLike]]
-        :param output_path: The path to write the evaluation results to if set.
+        :type jailbreak_data_path: Optional[Union[str, os.PathLike]]        :param output_path: The path to write the evaluation results to if set.
         :type output_path: Optional[Union[str, os.PathLike]]
+        :param data_paths: A dictionary of data paths to evaluate. If None, the Simulator will be run.
+        :type data_paths: Optional[Union[Dict[str, str], Dict[str, Union[str,os.PathLike]]]]
+        :param randomization_seed: The seed used to randomize prompt selection. If unset, the system's default seed is used.
+        :type randomization_seed: Optional[int]
+        :param concurrent_async_tasks: The number of concurrent async tasks to run. If None, the system's default is used.
+        :type concurrent_async_tasks: Optional[int]
         '''
-        ## Log inputs
-        self.logger.info(f"User inputs: evaluators{evaluators}, evaluation_name={evaluation_name}, num_turns={num_turns}, num_rows={num_rows}, scenario={scenario},conversation_turns={conversation_turns}, tasks={tasks}, source_text={source_text}, data_path={data_path}, jailbreak_data_path={jailbreak_data_path}, output_path={output_path}")
+        ## Log inputs
+        self.logger.info(f"User inputs: evaluators{evaluators}, evaluation_name={evaluation_name}, num_turns={num_turns}, num_rows={num_rows}, scenario={scenario},conversation_turns={conversation_turns}, tasks={tasks}, source_text={source_text}, data_path={data_path}, jailbreak_data_path={jailbreak_data_path}, output_path={output_path}, randomization_seed={randomization_seed}, concurrent_async_tasks={concurrent_async_tasks}")
         ## Validate arguments
         self._validate_inputs(
@@ -706,6 +797,7 @@ class _SafetyEvaluation:
                 tasks=tasks,
                 source_text=source_text,
                 direct_attack=_SafetyEvaluator.DIRECT_ATTACK in evaluators,
+                randomization_seed=randomization_seed,
             )
         elif data_path:
             data_paths = {Path(data_path).stem: data_path}

azure/ai/evaluation/_version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 # ---------------------------------------------------------
 # represents upcoming version
-VERSION = "1.5.0"
+VERSION = "1.7.0"

azure/ai/evaluation/red_team/_attack_strategy.py CHANGED Viewed

@@ -42,4 +42,4 @@ class AttackStrategy(Enum):
                 raise ValueError("All items must be instances of AttackStrategy")
         if len(items) > 2:
             raise ValueError("Composed strategies must have at most 2 items")
-        return items
+        return items

azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl