PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b3__py3-none-any.whl → 1.0.0b4__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b3py3-none-any.whl → 1.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (64) hide show

azure/ai/evaluation/__init__.py CHANGED Viewed

@@ -27,8 +27,8 @@ from ._evaluators._xpia import IndirectAttackEvaluator
 from ._model_configurations import (
     AzureAIProject,
     AzureOpenAIModelConfiguration,
-    OpenAIModelConfiguration,
     EvaluatorConfig,
+    OpenAIModelConfiguration,
 )
 __all__ = [

azure/ai/evaluation/_common/constants.py CHANGED Viewed

@@ -3,6 +3,8 @@
 # ---------------------------------------------------------
 from enum import Enum
+from azure.core import CaseInsensitiveEnumMeta
 class CommonConstants:
     """Define common constants."""
@@ -43,7 +45,7 @@ class _InternalAnnotationTasks:
     ECI = "eci"
-class EvaluationMetrics:
+class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Evaluation metrics to aid the RAI service in determining what
     metrics to request, and how to present them back to the user."""
@@ -56,7 +58,7 @@ class EvaluationMetrics:
     XPIA = "xpia"
-class _InternalEvaluationMetrics:
+class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """Evaluation metrics that are not publicly supported.
     These metrics are experimental and subject to potential change or migration to the main
     enum over time.

azure/ai/evaluation/_common/math.py ADDED Viewed

@@ -0,0 +1,18 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+import math
+from typing import List
+def list_sum(lst: List[float]) -> float:
+    return sum(lst)
+def list_mean(lst: List[float]) -> float:
+    return list_sum(lst) / len(lst)
+def list_mean_nan_safe(lst: List[float]) -> float:
+    return list_mean([l for l in lst if not math.isnan(l)])

azure/ai/evaluation/_common/rai_service.py CHANGED Viewed

@@ -3,20 +3,20 @@
 # ---------------------------------------------------------
 import asyncio
 import importlib.metadata
+import math
 import re
 import time
 from ast import literal_eval
-from typing import Dict, List
+from typing import Dict, List, Optional, Union, cast
 from urllib.parse import urlparse
 import jwt
-import numpy as np
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
-from azure.ai.evaluation._http_utils import get_async_http_client
+from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
 from azure.ai.evaluation._model_configurations import AzureAIProject
 from azure.core.credentials import TokenCredential
-from azure.identity import DefaultAzureCredential
+from azure.core.pipeline.policies import AsyncRetryPolicy
 from .constants import (
     CommonConstants,
@@ -53,7 +53,13 @@ def get_common_headers(token: str) -> Dict:
     }
-async def ensure_service_availability(rai_svc_url: str, token: str, capability: str = None) -> None:
+def get_async_http_client_with_timeout() -> AsyncHttpPipeline:
+    return get_async_http_client().with_policies(
+        retry_policy=AsyncRetryPolicy(timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT)
+    )
+async def ensure_service_availability(rai_svc_url: str, token: str, capability: Optional[str] = None) -> None:
     """Check if the Responsible AI service is available in the region and has the required capability, if relevant.
     :param rai_svc_url: The Responsible AI service URL.
@@ -68,9 +74,7 @@ async def ensure_service_availability(rai_svc_url: str, token: str, capability:
     svc_liveness_url = rai_svc_url + "/checkannotation"
     async with get_async_http_client() as client:
-        response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-            svc_liveness_url, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
-        )
+        response = await client.get(svc_liveness_url, headers=headers)
     if response.status_code != 200:
         msg = f"RAI service is not available in this region. Status Code: {response.status_code}"
@@ -154,16 +158,14 @@ async def submit_request(query: str, response: str, metric: str, rai_svc_url: st
     url = rai_svc_url + "/submitannotation"
     headers = get_common_headers(token)
-    async with get_async_http_client() as client:
-        response = await client.post(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-            url, json=payload, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
-        )
+    async with get_async_http_client_with_timeout() as client:
+        http_response = await client.post(url, json=payload, headers=headers)
-    if response.status_code != 202:
-        print("Fail evaluating '%s' with error message: %s" % (payload["UserTextList"], response.text))
-        response.raise_for_status()
+    if http_response.status_code != 202:
+        print("Fail evaluating '%s' with error message: %s" % (payload["UserTextList"], http_response.text()))
+        http_response.raise_for_status()
-    result = response.json()
+    result = http_response.json()
     operation_id = result["location"].split("/")[-1]
     return operation_id
@@ -190,10 +192,8 @@ async def fetch_result(operation_id: str, rai_svc_url: str, credential: TokenCre
         token = await fetch_or_reuse_token(credential, token)
         headers = get_common_headers(token)
-        async with get_async_http_client() as client:
-            response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-                url, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
-            )
+        async with get_async_http_client_with_timeout() as client:
+            response = await client.get(url, headers=headers)
         if response.status_code == 200:
             return response.json()
@@ -209,7 +209,7 @@ async def fetch_result(operation_id: str, rai_svc_url: str, credential: TokenCre
 def parse_response(  # pylint: disable=too-many-branches,too-many-statements
     batch_response: List[Dict], metric_name: str
-) -> Dict:
+) -> Dict[str, Union[str, float]]:
     """Parse the annotation response from Responsible AI service for a content harm evaluation.
     :param batch_response: The annotation response from Responsible AI service.
@@ -217,7 +217,7 @@ def parse_response(  # pylint: disable=too-many-branches,too-many-statements
     :param metric_name: The evaluation metric to use.
     :type metric_name: str
     :return: The parsed annotation result.
-    :rtype: List[List[Dict]]
+    :rtype: Dict[str, Union[str, float]]
     """
     # non-numeric metrics
     if metric_name in {EvaluationMetrics.PROTECTED_MATERIAL, _InternalEvaluationMetrics.ECI, EvaluationMetrics.XPIA}:
@@ -229,27 +229,27 @@ def parse_response(  # pylint: disable=too-many-branches,too-many-statements
         parsed_response = literal_eval(response)
         result = {}
         # Use label instead of score since these are assumed to be boolean results.
-        # Use np.nan as null value since it's ignored by aggregations rather than treated as 0.
-        result[metric_name + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
+        # Use math.nan as null value since it's ignored by aggregations rather than treated as 0.
+        result[metric_name + "_label"] = parsed_response["label"] if "label" in parsed_response else math.nan
         result[metric_name + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
         if metric_name == EvaluationMetrics.XPIA:
             # Add "manipulated_content", "intrusion" and "information_gathering" to the result
-            # if present else set them to np.nan
+            # if present else set them to math.nan
             result[metric_name + "_manipulated_content"] = (
-                parsed_response["manipulated_content"] if "manipulated_content" in parsed_response else np.nan
+                parsed_response["manipulated_content"] if "manipulated_content" in parsed_response else math.nan
             )
             result[metric_name + "_intrusion"] = (
-                parsed_response["intrusion"] if "intrusion" in parsed_response else np.nan
+                parsed_response["intrusion"] if "intrusion" in parsed_response else math.nan
             )
             result[metric_name + "_information_gathering"] = (
-                parsed_response["information_gathering"] if "information_gathering" in parsed_response else np.nan
+                parsed_response["information_gathering"] if "information_gathering" in parsed_response else math.nan
             )
         return result
     return _parse_content_harm_response(batch_response, metric_name)
-def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -> Dict:
+def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -> Dict[str, Union[str, float]]:
     """Parse the annotation response from Responsible AI service for a content harm evaluation.
     :param batch_response: The annotation response from Responsible AI service.
@@ -257,7 +257,7 @@ def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -
     :param metric_name: The evaluation metric to use.
     :type metric_name: str
     :return: The parsed annotation result.
-    :rtype: List[List[Dict]]
+    :rtype: Dict[str, Union[str, float]]
     """
     # Fix the metric name if it's "hate_fairness"
     # Eventually we will remove this fix once the RAI service is updated
@@ -265,7 +265,7 @@ def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -
     if key == EvaluationMetrics.HATE_FAIRNESS:
         key = EvaluationMetrics.HATE_UNFAIRNESS
-    result = {key: np.nan, key + "_score": np.nan, key + "_reason": ""}
+    result: Dict[str, Union[str, float]] = {key: math.nan, key + "_score": math.nan, key + "_reason": ""}
     response = batch_response[0]
     if metric_name not in response:
@@ -285,9 +285,9 @@ def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -
         if "label" in harm_response:
             metric_value = harm_response["label"]
         elif "valid" in harm_response:
-            metric_value = 0 if harm_response["valid"] else np.nan
+            metric_value = 0 if harm_response["valid"] else math.nan
         else:
-            metric_value = np.nan
+            metric_value = math.nan
         # get reason
         if "reasoning" in harm_response:
@@ -301,21 +301,21 @@ def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -
         if metric_value_match:
             metric_value = int(metric_value_match[0])
         else:
-            metric_value = np.nan
+            metric_value = math.nan
         reason = harm_response
     elif harm_response != "" and isinstance(harm_response, (int, float)):
         if 0 < harm_response <= 7:
             metric_value = harm_response
         else:
-            metric_value = np.nan
+            metric_value = math.nan
         reason = ""
     else:
-        metric_value = np.nan
+        metric_value = math.nan
         reason = ""
     harm_score = metric_value
-    if not np.isnan(metric_value):
-        # int(np.nan) causes a value error, and np.nan is already handled
+    if not math.isnan(metric_value):
+        # int(math.nan) causes a value error, and math.nan is already handled
         # by get_harm_severity_level
         harm_score = int(metric_value)
     result[key] = get_harm_severity_level(harm_score)
@@ -337,14 +337,13 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
     """
     headers = get_common_headers(token)
-    async with get_async_http_client() as client:
-        response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+    async with get_async_http_client_with_timeout() as client:
+        response = await client.get(
             f"https://management.azure.com/subscriptions/{azure_ai_project['subscription_id']}/"
             f"resourceGroups/{azure_ai_project['resource_group_name']}/"
             f"providers/Microsoft.MachineLearningServices/workspaces/{azure_ai_project['project_name']}?"
             f"api-version=2023-08-01-preview",
             headers=headers,
-            timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT,
         )
     if response.status_code != 200:
@@ -361,7 +360,7 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
     return f"{base_url.scheme}://{base_url.netloc}"
-async def get_rai_svc_url(project_scope: dict, token: str) -> str:
+async def get_rai_svc_url(project_scope: AzureAIProject, token: str) -> str:
     """Get the Responsible AI service URL
     :param project_scope: The Azure AI project scope details.
@@ -385,7 +384,7 @@ async def get_rai_svc_url(project_scope: dict, token: str) -> str:
     return rai_url
-async def fetch_or_reuse_token(credential: TokenCredential, token: str = None) -> str:
+async def fetch_or_reuse_token(credential: TokenCredential, token: Optional[str] = None) -> str:
     """Get token. Fetch a new token if the current token is near expiry
        :param credential: The Azure authentication credential.
@@ -395,29 +394,26 @@ async def fetch_or_reuse_token(credential: TokenCredential, token: str = None) -
        :type token: str
        :return: The Azure authentication token.
     """
-    acquire_new_token = True
-    try:
-        if token:
-            # Decode the token to get its expiration time
+    if token:
+        # Decode the token to get its expiration time
+        try:
             decoded_token = jwt.decode(token, options={"verify_signature": False})
+        except jwt.PyJWTError:
+            pass
+        else:
             exp_time = decoded_token["exp"]
             current_time = time.time()
-            # Check if the token is near expiry
+            # Return current token if not near expiry
             if (exp_time - current_time) >= 300:
-                acquire_new_token = False
-    except Exception:  # pylint: disable=broad-exception-caught
-        pass
-    if acquire_new_token:
-        token = credential.get_token("https://management.azure.com/.default").token
+                return token
-    return token
+    return credential.get_token("https://management.azure.com/.default").token
 async def evaluate_with_rai_service(
     query: str, response: str, metric_name: str, project_scope: AzureAIProject, credential: TokenCredential
-):
+) -> Dict[str, Union[str, float]]:
     """ "Evaluate the content safety of the response using Responsible AI service
        :param query: The query to evaluate.
@@ -432,12 +428,8 @@ async def evaluate_with_rai_service(
        :type credential:
     ~azure.core.credentials.TokenCredential
        :return: The parsed annotation result.
-       :rtype: List[List[Dict]]
+       :rtype: Dict[str, Union[str, float]]
     """
-    # Use DefaultAzureCredential if no credential is provided
-    # This is for the for batch run scenario as the credential cannot be serialized by promoptflow
-    if credential is None or credential == {}:
-        credential = DefaultAzureCredential()
     # Get RAI service URL from discovery service and check service availability
     token = await fetch_or_reuse_token(credential)
@@ -446,7 +438,7 @@ async def evaluate_with_rai_service(
     # Submit annotation request and fetch result
     operation_id = await submit_request(query, response, metric_name, rai_svc_url, token)
-    annotation_response = await fetch_result(operation_id, rai_svc_url, credential, token)
+    annotation_response = cast(List[Dict], await fetch_result(operation_id, rai_svc_url, credential, token))
     result = parse_response(annotation_response, metric_name)
     return result

azure/ai/evaluation/_common/utils.py CHANGED Viewed

@@ -2,21 +2,29 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+import math
 import threading
-from typing import List, Union
+from typing import Any, List, Literal, Mapping, Type, TypeVar, Union, cast, get_args, get_origin
 import nltk
-import numpy as np
+from typing_extensions import NotRequired, Required, TypeGuard
-from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
 from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
+from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
+from azure.ai.evaluation._model_configurations import (
+    AzureAIProject,
+    AzureOpenAIModelConfiguration,
+    OpenAIModelConfiguration,
+)
 from . import constants
 _nltk_data_download_lock = threading.Lock()
+T_TypedDict = TypeVar("T_TypedDict", bound=Mapping[Any, Any])
-def get_harm_severity_level(harm_score: int) -> str:
+def get_harm_severity_level(harm_score: int) -> Union[str, float]:
     """Generate harm severity level based on harm score.
     :param harm_score: The harm score to be evaluated.
@@ -30,12 +38,12 @@ def get_harm_severity_level(harm_score: int) -> str:
         constants.HarmSeverityLevel.Medium: [4, 5],
         constants.HarmSeverityLevel.High: [6, 7],
     }
-    if harm_score == np.nan or harm_score is None:
-        return np.nan
+    if math.isnan(harm_score) or harm_score is None:
+        return math.nan
     for harm_level, harm_score_range in HARM_SEVERITY_LEVEL_MAPPING.items():
         if harm_score_range[0] <= harm_score <= harm_score_range[1]:
             return harm_level.value
-    return np.nan
+    return math.nan
 def ensure_nltk_data_downloaded():
@@ -71,12 +79,20 @@ def nltk_tokenize(text: str) -> List[str]:
     return list(tokens)
+def _is_aoi_model_config(val: object) -> TypeGuard[AzureOpenAIModelConfiguration]:
+    return isinstance(val, dict) and all(isinstance(val.get(k), str) for k in ("azure_endpoint", "azure_deployment"))
+def _is_openai_model_config(val: object) -> TypeGuard[OpenAIModelConfiguration]:
+    return isinstance(val, dict) and all(isinstance(val.get(k), str) for k in ("model"))
 def parse_model_config_type(
     model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
 ) -> None:
-    if "azure_endpoint" in model_config or "azure_deployment" in model_config:
+    if _is_aoi_model_config(model_config):
         model_config["type"] = AZURE_OPENAI_TYPE
-    else:
+    elif _is_openai_model_config(model_config):
         model_config["type"] = OPENAI_TYPE
@@ -87,16 +103,170 @@ def construct_prompty_model_config(
 ) -> dict:
     parse_model_config_type(model_config)
-    if model_config["type"] == AZURE_OPENAI_TYPE:
+    if _is_aoi_model_config(model_config):
         model_config["api_version"] = model_config.get("api_version", default_api_version)
-    prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
+    prompty_model_config: dict = {"configuration": model_config, "parameters": {"extra_headers": {}}}
     # Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
     # https://github.com/encode/httpx/discussions/2959
     prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
-    if model_config["type"] == AZURE_OPENAI_TYPE and user_agent:
+    if _is_aoi_model_config(model_config) and user_agent:
         prompty_model_config["parameters"]["extra_headers"].update({"x-ms-useragent": user_agent})
     return prompty_model_config
+def validate_azure_ai_project(o: object) -> AzureAIProject:
+    fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
+    if not isinstance(o, dict):
+        msg = "azure_ai_project must be a dictionary"
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
+            category=ErrorCategory.MISSING_FIELD,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    missing_fields = set(fields.keys()) - o.keys()
+    if missing_fields:
+        msg = "azure_ai_project must contain keys: " + ", ".join(f'"{field}"' for field in missing_fields)
+        raise EvaluationException(
+            message=msg,
+            internal_message=msg,
+            target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
+            category=ErrorCategory.MISSING_FIELD,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    for field_name, expected_type in fields.items():
+        if isinstance(o[field_name], expected_type):
+            continue
+        msg = f"Expected azure_ai_project field {field_name!r} to be of type {expected_type}."
+        raise EvaluationException(
+            message=f"{msg}. Got {type(o[field_name])}.",
+            internal_message=msg,
+            target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
+            category=ErrorCategory.MISSING_FIELD,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    return cast(AzureAIProject, o)
+def validate_model_config(config: dict) -> Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]:
+    try:
+        return _validate_typed_dict(config, AzureOpenAIModelConfiguration)
+    except TypeError:
+        try:
+            return _validate_typed_dict(config, OpenAIModelConfiguration)
+        except TypeError as e:
+            msg = "Model config validation failed."
+            raise EvaluationException(
+                message=msg, internal_message=msg, category=ErrorCategory.MISSING_FIELD, blame=ErrorBlame.USER_ERROR
+            ) from e
+def _validate_typed_dict(o: object, t: Type[T_TypedDict]) -> T_TypedDict:
+    """Do very basic runtime validation that an object is a typed dict
+    .. warning::
+        This validation is very basic, robust enough to cover some very simple TypedDicts.
+        Ideally, validation of this kind should be delegated to something more robust.
+        You will very quickly run into limitations trying to apply this function more broadly:
+           * Doesn't support stringized annotations at all
+           * Very limited support for generics, and "special form" (NoReturn, NotRequired, Required, etc...) types.
+           * Error messages are poor, especially if there is any nesting.
+    :param object o: The object to check
+    :param Type[T_TypedDict] t: The TypedDict to validate against
+    :raises NotImplementedError: Several forms of validation are unsupported
+        * Checking against stringized annotations
+        * Checking a generic that is not one of a few basic forms
+    :raises TypeError: If a value does not match the specified annotation
+    :raises ValueError: If t's annotation is not a string, type of a special form (e.g. NotRequired, Required, etc...)
+    :returns: The object passed in
+    :rtype: T_TypedDict
+    """
+    if not isinstance(o, dict):
+        raise TypeError(f"Expected type 'dict', got type '{type(object)}'.")
+    annotations = t.__annotations__
+    is_total = getattr(t, "__total__", False)
+    unknown_keys = set(o.keys()) - annotations.keys()
+    if unknown_keys:
+        raise TypeError(f"dict contains unknown keys: {list(unknown_keys)!r}")
+    required_keys = {
+        k
+        for k in annotations
+        if (is_total and get_origin(annotations[k]) is not NotRequired)
+        or (not is_total and get_origin(annotations[k]) is Required)
+    }
+    missing_keys = required_keys - o.keys()
+    if missing_keys:
+        raise TypeError(f"Missing required keys: {list(missing_keys)!r}.")
+    def validate_annotation(v: object, annotation: Union[str, type, object]) -> bool:
+        if isinstance(annotation, str):
+            raise NotImplementedError("Missing support for validating against stringized annotations.")
+        if (origin := get_origin(annotation)) is not None:
+            if origin is tuple:
+                validate_annotation(v, tuple)
+                tuple_args = get_args(annotation)
+                if len(cast(tuple, v)) != len(tuple_args):
+                    raise TypeError(f"Expected a {len(tuple_args)}-tuple, got a {len(cast(tuple, v))}-tuple.")
+                for tuple_val, tuple_args in zip(cast(tuple, v), tuple_args):
+                    validate_annotation(tuple_val, tuple_args)
+            elif origin is dict:
+                validate_annotation(v, dict)
+                dict_key_ann, dict_val_ann = get_args(annotation)
+                for dict_key, dict_val in cast(dict, v).items():
+                    validate_annotation(dict_val, dict_val_ann)
+                    validate_annotation(dict_key, dict_key_ann)
+            elif origin is list:
+                validate_annotation(v, list)
+                list_val_ann = get_args(annotation)[0]
+                for list_val in cast(list, v):
+                    validate_annotation(list_val, list_val_ann)
+            elif origin is Union:
+                for generic_arg in get_args(annotation):
+                    try:
+                        validate_annotation(v, generic_arg)
+                        return True
+                    except TypeError:
+                        pass
+                    raise TypeError(f"Expected value to have type {annotation}. Received type {type(v)}")
+            elif origin is Literal:
+                literal_args = get_args(annotation)
+                if not any(type(literal) is type(v) and literal == v for literal in literal_args):
+                    raise TypeError(f"Expected value to be one of {list(literal_args)!r}. Received type {type(v)}")
+            elif any(origin is g for g in (NotRequired, Required)):
+                validate_annotation(v, get_args(annotation)[0])
+            else:
+                raise NotImplementedError(f"Validation not implemented for generic {origin}.")
+            return True
+        if isinstance(annotation, type):
+            if not isinstance(v, annotation):
+                raise TypeError(f"Expected value to have type {annotation}. Received type {type(v)}.")
+            return True
+        raise ValueError("Annotation to validate against should be a str, type, or generic.")
+    for k, v in o.items():
+        validate_annotation(v, annotations[k])
+    return cast(T_TypedDict, o)

azure/ai/evaluation/_constants.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+from typing import Literal
 class EvaluationMetrics:
@@ -48,6 +49,13 @@ class DefaultOpenEncoding:
     """SDK Default Encoding when writing a file"""
+class EvaluationRunProperties:
+    """Defines properties used to identify an evaluation run by UI"""
+    RUN_TYPE = "runType"
+    EVALUATION_RUN = "_azureml.evaluation_run"
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
 CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
@@ -58,6 +66,6 @@ PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC"
 OTEL_EXPORTER_OTLP_TRACES_TIMEOUT = "OTEL_EXPORTER_OTLP_TRACES_TIMEOUT"
 OTEL_EXPORTER_OTLP_TRACES_TIMEOUT_DEFAULT = 60
-AZURE_OPENAI_TYPE = "azure_openai"
+AZURE_OPENAI_TYPE: Literal["azure_openai"] = "azure_openai"
-OPENAI_TYPE = "openai"
+OPENAI_TYPE: Literal["openai"] = "openai"

azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import os
+import types
+from typing import Optional, Type, Union
 from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
 from promptflow._utils.user_agent_utils import ClientUserAgentUtil
@@ -30,12 +32,12 @@ class BatchRunContext:
     ]
     """
-    def __init__(self, client) -> None:
+    def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
         self.client = client
         self._is_batch_timeout_set_by_system = False
         self._is_otel_timeout_set_by_system = False
-    def __enter__(self):
+    def __enter__(self) -> None:
         if isinstance(self.client, CodeClient):
             ClientUserAgentUtil.append_user_agent(USER_AGENT)
             inject_openai_api()
@@ -56,7 +58,12 @@ class BatchRunContext:
             # For addressing the issue of asyncio event loop closed on Windows
             set_event_loop_policy()
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        exc_tb: Optional[types.TracebackType],
+    ) -> None:
         if isinstance(self.client, CodeClient):
             recover_openai_api()

azure-ai-evaluation 1.0.0b3__py3-none-any.whl → 1.0.0b4__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b3py3-none-any.whl → 1.0.0b4py3-none-any.whl