PyPI - azure-ai-evaluation - Versions diffs - 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show

azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py CHANGED Viewed

@@ -8,27 +8,28 @@ from azure.ai.evaluation._common._experimental import experimental
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
 @experimental
 class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
     """
-    Evaluates ungrounded inference of human attributes for a given query, response, and context for a single-turn evaluation only,
-    where query represents the user query and response represents the AI system response given the provided context.
-    Ungrounded Attributes checks for whether a response is first, ungrounded, and checks if it contains information about protected class or
+    Evaluates ungrounded inference of human attributes for a given query, response, and context for a single-turn evaluation only,
+    where query represents the user query and response represents the AI system response given the provided context.
+    Ungrounded Attributes checks for whether a response is first, ungrounded, and checks if it contains information about protected class or
     emotional state of a person.
     It identifies the following attributes:
     - emotional_state
     - protected_class
     - groundedness
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project.
-        It contains subscription id, resource group, and project name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
     :param kwargs: Additional arguments to pass to the evaluator.
     :type kwargs: Any
@@ -42,13 +43,13 @@ class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
             :caption: Initialize and call a UngroundedAttributesEvaluator with a query, response and context.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START ungrounded_attributes_evaluator]
             :end-before: [END ungrounded_attributes_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call UngroundedAttributesEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call UngroundedAttributesEvaluator using Azure AI Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     .. note::
@@ -57,19 +58,26 @@ class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
         for the ungrounded attributes will be "ungrounded_attributes_label".
     """
-    id = "ungrounded_attributes"
+    id = "azureai://built-in/evaluators/ungrounded_attributes"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        **kwargs,
     ):
+        # Set default for evaluate_query if not provided
+        if "evaluate_query" not in kwargs:
+            kwargs["evaluate_query"] = True
         super().__init__(
             eval_metric=EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            **kwargs,
         )
     @overload
@@ -109,5 +117,5 @@ class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
         :return: The ungrounded attributes label.
         :rtype: Dict[str, Union[str, bool]]
         """
         return super().__call__(*args, **kwargs)

azure/ai/evaluation/_evaluators/_xpia/xpia.py CHANGED Viewed

@@ -40,9 +40,9 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
     :param credential: The credential for connecting to Azure AI project. Required
     :type credential: ~azure.core.credentials.TokenCredential
-    :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
-        name.
-    :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
+    :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
+        or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
+    :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
     :param threshold: The threshold for the IndirectAttack evaluator. Default is 0.
     :type threshold: int
@@ -54,32 +54,35 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
             :language: python
             :dedent: 8
             :caption: Initialize and call an IndirectAttackEvaluator.
     .. admonition:: Example using Azure AI Project URL:
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
             :start-after: [START indirect_attack_evaluator]
             :end-before: [END indirect_attack_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call IndirectAttackEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call IndirectAttackEvaluator using Azure AI Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
     """
-    id = "azureml://registries/azureml/models/Indirect-Attack-Evaluator/versions/3"
+    id = "azureai://built-in/evaluators/indirect_attack"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
+    _OPTIONAL_PARAMS = ["query"]
     @override
     def __init__(
         self,
         credential,
         azure_ai_project,
+        **kwargs,
     ):
         super().__init__(
             eval_metric=EvaluationMetrics.XPIA,
             azure_ai_project=azure_ai_project,
             credential=credential,
+            **kwargs,
         )
     @overload

azure/ai/evaluation/_exceptions.py CHANGED Viewed

@@ -9,6 +9,15 @@ from typing import Optional
 from azure.core.exceptions import AzureError
+class ErrorMessage(Enum):
+    """Error messages to be used when raising EvaluationException.
+    These messages are used to provide a consistent error message format across the SDK.
+    """
+    MALFORMED_CONVERSATION_HISTORY = "Malformed Conversation History: Query parameter representing conversation history should have exactly one more user query than agent responses"
 class ErrorCategory(Enum):
     """Error category to be specified when using EvaluationException class.
@@ -87,6 +96,7 @@ class ErrorTarget(Enum):
     TOOL_CALL_ACCURACY_EVALUATOR = "ToolCallAccuracyEvaluator"
     RED_TEAM = "RedTeam"
     AOAI_GRADER = "AoaiGrader"
+    CONVERSATION_HISTORY_PARSING = "_get_conversation_history"
 class EvaluationException(AzureError):

azure/ai/evaluation/_http_utils.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any, Dict, MutableMapping, Optional, TypedDict, cast
 from typing_extensions import Self, Unpack
-from azure.ai.evaluation._user_agent import USER_AGENT
+from azure.ai.evaluation._user_agent import UserAgentSingleton
 from azure.core.configuration import Configuration
 from azure.core.pipeline import AsyncPipeline, Pipeline
 from azure.core.pipeline.policies import (
@@ -454,7 +454,7 @@ def get_http_client(**kwargs: Any) -> HttpPipeline:
     :returns: An HttpPipeline with a set of applied policies:
     :rtype: HttpPipeline
     """
-    kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=USER_AGENT))
+    kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=UserAgentSingleton().value))
     return HttpPipeline(**kwargs)
@@ -464,5 +464,5 @@ def get_async_http_client(**kwargs: Any) -> AsyncHttpPipeline:
     :returns: An AsyncHttpPipeline with a set of applied policies:
     :rtype: AsyncHttpPipeline
     """
-    kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=USER_AGENT))
+    kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=UserAgentSingleton().value))
     return AsyncHttpPipeline(**kwargs)

azure/ai/evaluation/_legacy/_batch_engine/_config.py CHANGED Viewed

@@ -19,7 +19,7 @@ class BatchEngineConfig:
     batch_timeout_seconds: int = PF_BATCH_TIMEOUT_SEC_DEFAULT
     """The maximum amount of time to wait for all evaluations in the batch to complete."""
-    run_timeout_seconds: int = 600
+    line_timeout_seconds: int = 600
     """The maximum amount of time to wait for an evaluation to run against a single entry
     in the data input to complete."""
@@ -32,13 +32,16 @@ class BatchEngineConfig:
     default_num_results: int = 100
     """The default number of results to return if you don't ask for all results."""
+    raise_on_error: bool = True
+    """Whether to raise an error if an evaluation fails."""
     def __post_init__(self):
         if self.logger is None:
             raise ValueError("logger cannot be None")
         if self.batch_timeout_seconds <= 0:
             raise ValueError("batch_timeout_seconds must be greater than 0")
-        if self.run_timeout_seconds <= 0:
-            raise ValueError("run_timeout_seconds must be greater than 0")
+        if self.line_timeout_seconds <= 0:
+            raise ValueError("line_timeout_seconds must be greater than 0")
         if self.max_concurrency <= 0:
             raise ValueError("max_concurrency must be greater than 0")
         if self.default_num_results <= 0:

azure/ai/evaluation/_legacy/_batch_engine/_engine.py CHANGED Viewed

@@ -20,15 +20,31 @@ from concurrent.futures import Executor
 from functools import partial
 from contextlib import contextmanager
 from datetime import datetime, timezone
-from typing import Any, Callable, Dict, Final, Generator, Mapping, MutableMapping, Optional, Sequence, Set, Tuple, cast
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Final,
+    Generator,
+    List,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    cast,
+    Literal,
+)
 from uuid import uuid4
+from ._config import BatchEngineConfig
 from ._utils import DEFAULTS_KEY, get_int_env_var, get_value_from_path, is_async_callable
 from ._status import BatchStatus
 from ._result import BatchResult, BatchRunDetails, BatchRunError, TokenMetrics
 from ._run_storage import AbstractRunStorage, NoOpRunStorage
-from .._common._logging import log_progress, NodeLogManager
-from ..._exceptions import ErrorBlame
+from .._common._logging import log_progress, logger, NodeLogManager
+from ..._exceptions import ErrorBlame, EvaluationException
 from ._exceptions import (
     BatchEngineCanceledError,
     BatchEngineError,
@@ -54,30 +70,25 @@ class BatchEngine:
         self,
         func: Callable,
         *,
+        config: BatchEngineConfig,
         storage: Optional[AbstractRunStorage] = None,
-        batch_timeout_sec: Optional[int] = None,
-        line_timeout_sec: Optional[int] = None,
-        max_worker_count: Optional[int] = None,
         executor: Optional[Executor] = None,
     ):
         """Create a new batch engine instance
         :param Callable func: The function to run the flow
+        :param BatchEngineConfig config: The configuration for the batch engine
         :param Optional[AbstractRunStorage] storage: The storage to store execution results
-        :param Optional[int] batch_timeout_sec: The timeout of batch run in seconds
-        :param Optional[int] line_timeout_sec: The timeout of each line in seconds
-        :param Optional[int] max_worker_count: The concurrency limit of batch run
         :param Optional[Executor] executor: The executor to run the flow (if needed)
         """
         self._func: Callable = func
+        self._config: BatchEngineConfig = config
         self._storage: AbstractRunStorage = storage or NoOpRunStorage()
-        # TODO ralphe: Consume these from the batch context/config instead of from
-        #              kwargs or (even worse) environment variables
-        self._batch_timeout_sec = batch_timeout_sec or get_int_env_var("PF_BATCH_TIMEOUT_SEC")
-        self._line_timeout_sec = line_timeout_sec or get_int_env_var("PF_LINE_TIMEOUT_SEC", 600)
-        self._max_worker_count = max_worker_count or get_int_env_var("PF_WORKER_COUNT") or MAX_WORKER_COUNT
+        self._batch_timeout_sec = self._config.batch_timeout_seconds
+        self._line_timeout_sec = self._config.line_timeout_seconds
+        self._max_worker_count = self._config.max_concurrency
         self._executor: Optional[Executor] = executor
         self._is_canceled: bool = False
@@ -85,15 +96,13 @@ class BatchEngine:
     async def run(
         self,
         data: Sequence[Mapping[str, Any]],
-        column_mapping: Mapping[str, str],
+        column_mapping: Optional[Mapping[str, str]],
         *,
         id: Optional[str] = None,
         max_lines: Optional[int] = None,
     ) -> BatchResult:
         if not data:
             raise BatchEngineValidationError("Please provide a non-empty data mapping.")
-        if not column_mapping:
-            raise BatchEngineValidationError("The column mapping is required.")
         start_time = datetime.now(timezone.utc)
@@ -105,6 +114,8 @@ class BatchEngine:
             id = id or str(uuid4())
             result: BatchResult = await self._exec_in_task(id, batch_inputs, start_time)
             return result
+        except EvaluationException:
+            raise
         except Exception as ex:
             raise BatchEngineError(
                 "Unexpected error while running the batch run.", blame=ErrorBlame.SYSTEM_ERROR
@@ -114,20 +125,58 @@ class BatchEngine:
         # TODO ralphe: Make sure this works
         self._is_canceled = True
-    @staticmethod
     def _apply_column_mapping(
+        self,
         data: Sequence[Mapping[str, Any]],
-        column_mapping: Mapping[str, str],
+        column_mapping: Optional[Mapping[str, str]],
         max_lines: Optional[int],
     ) -> Sequence[Mapping[str, str]]:
+        resolved_column_mapping: Mapping[str, str] = self._resolve_column_mapping(column_mapping)
+        resolved_column_mapping.update(self._generate_defaults_for_column_mapping())
+        return self._apply_column_mapping_to_lines(data, resolved_column_mapping, max_lines)
+    def _resolve_column_mapping(
+        self,
+        column_mapping: Optional[Mapping[str, str]],
+    ) -> Mapping[str, str]:
+        parameters = inspect.signature(self._func).parameters
+        default_column_mapping: Dict[str, str] = {
+            name: f"${{data.{name}}}"
+            for name, value in parameters.items()
+            if name not in ["self", "cls", "args", "kwargs"]
+        }
+        resolved_mapping: Dict[str, str] = default_column_mapping.copy()
+        for name, value in parameters.items():
+            if value and value.default is not inspect.Parameter.empty:
+                resolved_mapping.pop(name)
+        resolved_mapping.update(column_mapping or {})
+        return resolved_mapping
+    def _generate_defaults_for_column_mapping(self) -> Mapping[Literal["$defaults$"], Any]:
+        return {
+            DEFAULTS_KEY: {
+                name: value.default
+                for name, value in inspect.signature(self._func).parameters.items()
+                if value.default is not inspect.Parameter.empty
+            }
+        }
+    @staticmethod
+    def _apply_column_mapping_to_lines(
+        data: Sequence[Mapping[str, Any]],
+        column_mapping: Mapping[str, str],
+        max_lines: Optional[int],
+    ) -> Sequence[Mapping[str, Any]]:
         data = data[:max_lines] if max_lines else data
         inputs: Sequence[Mapping[str, Any]] = []
-        line: int = 0
         defaults = cast(Mapping[str, Any], column_mapping.get(DEFAULTS_KEY, {}))
-        for input in data:
-            line += 1
+        for line_number, input in enumerate(data, start=1):
             mapped: Dict[str, Any] = {}
             missing_inputs: Set[str] = set()
@@ -148,18 +197,18 @@ class BatchEngine:
                     continue
                 dict_path = match.group(1)
-                found, value = get_value_from_path(dict_path, input)
+                found, mapped_value = get_value_from_path(dict_path, input)
                 if not found:  # try default value
-                    found, value = get_value_from_path(dict_path, defaults)
+                    found, mapped_value = get_value_from_path(dict_path, defaults)
                 if found:
-                    mapped[key] = value
+                    mapped[key] = mapped_value
                 else:
                     missing_inputs.add(dict_path)
             if missing_inputs:
                 missing = ", ".join(missing_inputs)
-                raise BatchEngineValidationError(f"Missing inputs for line {line}: '{missing}'")
+                raise BatchEngineValidationError(f"Missing inputs for line {line_number}: '{missing}'")
             inputs.append(mapped)
@@ -212,10 +261,12 @@ class BatchEngine:
                     end_time=None,
                     tokens=TokenMetrics(0, 0, 0),
                     error=BatchRunError("The line run is not completed.", None),
+                    index=i,
                 )
             )
             for i in range(len(batch_inputs))
         ]
+        self.handle_line_failures(result_details)
         for line_result in result_details:
             # Indicate the worst status of the batch run. This works because
@@ -229,9 +280,15 @@ class BatchEngine:
                 metrics.total_tokens += line_result.tokens.total_tokens
         if failed_lines and not error:
-            error = BatchEngineRunFailedError(
-                str(floor(failed_lines / len(batch_inputs) * 100)) + f"% of the batch run failed."
+            error_message = f"{floor(failed_lines / len(batch_inputs) * 100)}% of the batch run failed."
+            first_exception: Optional[Exception] = next(
+                (result.error.exception for result in result_details if result.error and result.error.exception),
+                None,
             )
+            if first_exception is not None:
+                error_message += f" {first_exception}"
+            error = BatchEngineRunFailedError(error_message)
         return BatchResult(
             status=status,
@@ -283,6 +340,13 @@ class BatchEngine:
                 # TODO ralphe: set logger to use here
             )
+    def __preprocess_inputs(self, inputs: Mapping[str, Any]) -> Mapping[str, Any]:
+        func_params = inspect.signature(self._func).parameters
+        filtered_params = {key: value for key, value in inputs.items() if key in func_params}
+        return filtered_params
     async def _exec_line_async(
         self,
         run_id: str,
@@ -298,6 +362,7 @@ class BatchEngine:
                 end_time=None,
                 tokens=TokenMetrics(0, 0, 0),
                 error=None,
+                index=index,
             )
             try:
@@ -313,15 +378,17 @@ class BatchEngine:
                     #       For now we will just run the function in the current process, but in the future we may
                     #       want to consider running the function in a separate process for isolation reasons.
                     output: Any
+                    processed_inputs = self.__preprocess_inputs(inputs)
                     if is_async_callable(self._func):
-                        output = await self._func(**inputs)
+                        output = await self._func(**processed_inputs)
                     else:
                         # to maximize the parallelism, we run the synchronous function in a separate thread
                         # and await its result
                         output = await asyncio.get_event_loop().run_in_executor(
-                            self._executor,
-                            partial(self._func, **inputs))
+                            self._executor, partial(self._func, **processed_inputs)
+                        )
                     # This should in theory never happen but as an extra precaution, let's check if the output
                     # is awaitable and await it if it is.
                     if inspect.isawaitable(output):
@@ -340,6 +407,24 @@ class BatchEngine:
         return index, details
+    @staticmethod
+    def handle_line_failures(run_infos: List[BatchRunDetails], raise_on_line_failure: bool = False):
+        """Handle line failures in batch run"""
+        failed_run_infos: List[BatchRunDetails] = [r for r in run_infos if r.status == BatchStatus.Failed]
+        failed_msg: Optional[str] = None
+        if len(failed_run_infos) > 0:
+            failed_indexes = ",".join([str(r.index) for r in failed_run_infos])
+            first_fail_exception: str = failed_run_infos[0].error.details
+            if raise_on_line_failure:
+                failed_msg = "Flow run failed due to the error: " + first_fail_exception
+                raise Exception(failed_msg)
+            failed_msg = (
+                f"{len(failed_run_infos)}/{len(run_infos)} flow run failed, indexes: [{failed_indexes}],"
+                f" exception of index {failed_run_infos[0].index}: {first_fail_exception}"
+            )
+            logger.error(failed_msg)
     def _persist_run_info(self, line_results: Sequence[BatchRunDetails]):
         # TODO ralphe: implement?
         pass

azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py CHANGED Viewed

@@ -90,7 +90,9 @@ def _openai_api_list() -> Generator[Tuple[Any, Callable, bool], None, None]:
         except ImportError:
             raise MissingRequiredPackage("Please install the 'openai' package to use the Azure AI Evaluation SDK")
         except AttributeError:
-            logging.warning("The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name)
+            logging.warning(
+                "The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name
+            )
 def inject_openai_api():
@@ -117,6 +119,7 @@ def recover_openai_api():
 class CaptureOpenAITokenUsage:
     """Context manager to capture OpenAI token usage."""
     def __init__(self):
         self._tokens = TokenMetrics(0, 0, 0)
@@ -126,4 +129,4 @@ class CaptureOpenAITokenUsage:
     def __exit__(self, exc_type: Optional[Exception], exc_value: Optional[Exception], traceback: Optional[Any]) -> None:
         captured_metrics = _token_metrics.get()
-        self._tokens.update(captured_metrics)
+        self._tokens.update(captured_metrics)

azure/ai/evaluation/_legacy/_batch_engine/_result.py CHANGED Viewed

@@ -55,6 +55,8 @@ class BatchRunDetails:
     """The token metrics of the line run."""
     error: Optional[BatchRunError]
     """The error of the line run. This will only be set if the status is Failed."""
+    index: int
+    """The line run index."""
     @property
     def duration(self) -> timedelta:

azure/ai/evaluation/_legacy/_batch_engine/_run.py CHANGED Viewed

@@ -58,7 +58,7 @@ class Run:
         dynamic_callable: Callable,
         name_prefix: Optional[str],
         inputs: Sequence[Mapping[str, Any]],
-        column_mapping: Mapping[str, str],
+        column_mapping: Optional[Mapping[str, str]] = None,
         created_on: Optional[datetime] = None,
         run: Optional["Run"] = None,
     ):
@@ -70,7 +70,7 @@ class Run:
         self.dynamic_callable = dynamic_callable
         self.name = self._generate_run_name(name_prefix, self._created_on)
         self.inputs = inputs
-        self.column_mapping = column_mapping
+        self.column_mapping: Optional[Mapping[str, str]] = column_mapping
         self.result: Optional[BatchResult] = None
         self.metrics: Mapping[str, Any] = {}
         self._run = run

azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.8.0py3-none-any.whl → 1.10.0py3-none-any.whl