PyPI - azure-ai-evaluation - Versions diffs - 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (150) hide show

azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py CHANGED Viewed

@@ -6,12 +6,15 @@ import copy
 import json
 import time
 import uuid
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, Dict, List, Optional, cast, Union
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
 from azure.ai.evaluation._user_agent import USER_AGENT
 from azure.core.exceptions import HttpResponseError
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
+from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from azure.ai.evaluation._common.onedp.models import SimulationDTO
+from azure.ai.evaluation._common.constants import RAIService
 from .._model_tools._template_handler import TemplateParameters
 from .models import OpenAIChatCompletionsModel
@@ -40,14 +43,14 @@ class SimulationRequestDTO:
         headers: Dict[str, str],
         payload: Dict[str, Any],
         params: Dict[str, str],
-        templatekey: str,
+        template_key: str,
         template_parameters: Optional[TemplateParameters],
     ):
         self.url = url
         self.headers = headers
         self.json = json.dumps(payload)
         self.params = params
-        self.templatekey = templatekey
+        self.template_key = template_key
         self.templateParameters = template_parameters
     def to_dict(self) -> Dict:
@@ -111,7 +114,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
     async def get_conversation_completion(
         self,
         messages: List[Dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str = "assistant",  # pylint: disable=unused-argument
         **request_params,
     ) -> dict:
@@ -142,7 +145,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
     async def request_api(
         self,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         request_data: dict,
     ) -> dict:
         """
@@ -183,51 +186,72 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
             headers=headers,
             payload=request_data,
             params=params,
-            templatekey=self.tkey,
+            template_key=self.tkey,
             template_parameters=self.tparam,
         )
         time_start = time.time()
         full_response = None
-        response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
-        if response.status_code != 202:
-            raise HttpResponseError(
-                message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
+        if(isinstance(session, AIProjectClient)):
+            sim_request_dto = SimulationDTO(
+                headers=headers,
+                params=params,
+                json=json.dumps(request_data),
+                template_key=self.tkey,
+                template_parameters=self.tparam,
             )
-        response_data = response.json()
-        self.result_url = cast(str, response_data["location"])
-        retry_policy = AsyncRetryPolicy(  # set up retry configuration
-            retry_on_status_codes=[202],  # on which statuses to retry
-            retry_total=7,
-            retry_backoff_factor=10.0,
-            retry_backoff_max=180,
-            retry_mode=RetryMode.Exponential,
-        )
-        # initial 15 seconds wait before attempting to fetch result
-        # Need to wait both in this thread and in the async thread for some reason?
-        # Someone not under a crunch and with better async understandings should dig into this more.
-        await asyncio.sleep(15)
-        time.sleep(15)
-        async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
-            token = await self.token_manager.get_token_async()
-            proxy_headers = {
-                "Authorization": f"Bearer {token}",
-                "Content-Type": "application/json",
-                "User-Agent": USER_AGENT,
-            }
-            response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-                self.result_url, headers=proxy_headers
+            response_data = session.red_teams.submit_simulation(sim_request_dto, headers=headers, params=params)
+            operation_id = response_data["location"].split("/")[-1]
+            request_count = 0
+            flag = True
+            while flag:
+                response = session.evaluations.operation_results(operation_id, headers=headers)
+                if response.status_code == 200:
+                    response_data = cast(List[Dict], response.json())
+                    flag = False
+                else:
+                    request_count += 1
+                    sleep_time = RAIService.SLEEP_TIME**request_count
+                    await asyncio.sleep(sleep_time)
+        else:
+            response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
+            # response.raise_for_status()
+            if response.status_code != 202:
+                raise HttpResponseError(
+                    message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
+                )
+            response_data = response.json()
+            self.result_url = cast(str, response_data["location"])
+            retry_policy = AsyncRetryPolicy(  # set up retry configuration
+                retry_on_status_codes=[202],  # on which statuses to retry
+                retry_total=7,
+                retry_backoff_factor=10.0,
+                retry_backoff_max=180,
+                retry_mode=RetryMode.Exponential,
             )
-        response.raise_for_status()
-        response_data = response.json()
+            # initial 15 seconds wait before attempting to fetch result
+            # Need to wait both in this thread and in the async thread for some reason?
+            # Someone not under a crunch and with better async understandings should dig into this more.
+            await asyncio.sleep(15)
+            time.sleep(15)
+            async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
+                token = await self.token_manager.get_token_async()
+                proxy_headers = {
+                    "Authorization": f"Bearer {token}",
+                    "Content-Type": "application/json",
+                    "User-Agent": USER_AGENT,
+                }
+                response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+                    self.result_url, headers=proxy_headers
+                )
+            response.raise_for_status()
+            response_data = response.json()
         self.logger.info("Response: %s", response_data)
         # Copy the full response and return it to be saved in jsonl.

azure/ai/evaluation/simulator/_model_tools/_template_handler.py CHANGED Viewed

@@ -2,11 +2,12 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Dict, List, Optional, TypedDict, cast
+from typing import Dict, List, Optional, TypedDict, cast, Union
+from ast import literal_eval
 from typing_extensions import NotRequired
 from azure.ai.evaluation._model_configurations import AzureAIProject
+from azure.ai.evaluation._common.onedp._client import AIProjectClient
 from ._rai_client import RAIClient
@@ -153,7 +154,7 @@ class AdversarialTemplateHandler:
     :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
     """
-    def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
+    def __init__(self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]) -> None:
         self.azure_ai_project = azure_ai_project
         self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
         self.rai_client = rai_client
@@ -163,8 +164,11 @@ class AdversarialTemplateHandler:
             categorized_parameters: Dict[str, _CategorizedParameter] = {}
             util = ContentHarmTemplatesUtils
-            parameters = await self.rai_client.get_contentharm_parameters()
+            if isinstance(self.rai_client, RAIClient):
+                parameters = await self.rai_client.get_contentharm_parameters()
+            elif isinstance(self.rai_client, AIProjectClient):
+                parameters = literal_eval(self.rai_client.red_teams.get_template_parameters())
             for k in parameters.keys():
                 template_key = util.get_template_key(k)
                 categorized_parameters[template_key] = {

azure/ai/evaluation/simulator/_model_tools/models.py CHANGED Viewed

@@ -12,6 +12,8 @@ from abc import ABC, abstractmethod
 from collections import deque
 from typing import Deque, Dict, List, Optional, Union
 from urllib.parse import urlparse
+from azure.ai.evaluation._common.onedp._client import AIProjectClient
+from ._rai_client import RAIClient
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline
@@ -78,7 +80,7 @@ class LLMBase(ABC):
     async def get_completion(
         self,
         prompt: str,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         **request_params,
     ) -> dict:
         """
@@ -100,7 +102,7 @@ class LLMBase(ABC):
     async def get_all_completions(
         self,
         prompts: List[str],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_max_parallel_count: int,
         api_call_delay_seconds: float,
         request_error_rate_threshold: float,
@@ -120,7 +122,7 @@ class LLMBase(ABC):
     async def get_conversation_completion(
         self,
         messages: List[dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str,
         **request_params,
     ) -> dict:
@@ -274,7 +276,7 @@ class OpenAICompletionsModel(LLMBase):
     async def get_conversation_completion(
         self,
         messages: List[dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str = "assistant",
         **request_params,
     ) -> dict:
@@ -304,7 +306,7 @@ class OpenAICompletionsModel(LLMBase):
     async def get_all_completions(  # type: ignore[override]
         self,
         prompts: List[Dict[str, str]],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_max_parallel_count: int = 1,
         api_call_delay_seconds: float = 0.1,
         request_error_rate_threshold: float = 0.5,
@@ -372,7 +374,7 @@ class OpenAICompletionsModel(LLMBase):
         self,
         request_datas: List[dict],
         output_collector: List,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_delay_seconds: float = 0.1,
         request_error_rate_threshold: float = 0.5,
     ) -> None:
@@ -433,7 +435,7 @@ class OpenAICompletionsModel(LLMBase):
     async def request_api(
         self,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         request_data: dict,
     ) -> dict:
         """
@@ -476,11 +478,12 @@ class OpenAICompletionsModel(LLMBase):
         time_start = time.time()
         full_response = None
-        response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
-        response.raise_for_status()
-        response_data = response.json()
+        if(isinstance(session, AIProjectClient)):
+            response_data = session.red_teams.submit_simulation(request_data, headers, params)
+        else:
+            response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
+            response.raise_for_status()
+            response_data = response.json()
         self.logger.info(f"Response: {response_data}")
@@ -533,7 +536,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
     async def get_conversation_completion(
         self,
         messages: List[dict],
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         role: str = "assistant",
         **request_params,
     ) -> dict:
@@ -544,7 +547,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
         ----------
         messages: List of messages to query the model with.
         Expected format: [{"role": "user", "content": "Hello!"}, ...]
-        session: AsyncHttpPipeline object to query the model with.
+        session: Union[AsyncHttpPipeline, AIProjectClient] object to query the model with.
         role: Not used for this model, since it is a chat model.
         request_params: Additional parameters to pass to the model.
         """
@@ -560,7 +563,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
     async def get_completion(
         self,
         prompt: str,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         **request_params,
     ) -> dict:
         """
@@ -569,7 +572,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
         Parameters
         ----------
         prompt: Prompt str to query model with.
-        session: AsyncHttpPipeline object to use for the request.
+        session: Union[AsyncHttpPipeline, AIProjectClient] object to use for the request.
         **request_params: Additional parameters to pass to the request.
         """
         messages = [{"role": "system", "content": prompt}]
@@ -583,7 +586,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
     async def get_all_completions(
         self,
         prompts: List[str],  # type: ignore[override]
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         api_call_max_parallel_count: int = 1,
         api_call_delay_seconds: float = 0.1,
         request_error_rate_threshold: float = 0.5,

azure/ai/evaluation/simulator/_simulator.py CHANGED Viewed

@@ -11,7 +11,7 @@ import re
 import warnings
 from typing import Any, Callable, Dict, List, Optional, Union, Tuple
-from promptflow.core import AsyncPrompty
+from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
 from tqdm import tqdm
 from azure.ai.evaluation._common._experimental import experimental

{azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: azure-ai-evaluation
-Version: 1.4.0
+Version: 1.6.0
 Summary: Microsoft Azure Evaluation Library for Python
 Home-page: https://github.com/Azure/azure-sdk-for-python
 Author: Microsoft Corporation
@@ -28,8 +28,14 @@ Requires-Dist: azure-identity>=1.16.0
 Requires-Dist: azure-core>=1.30.2
 Requires-Dist: nltk>=3.9.1
 Requires-Dist: azure-storage-blob>=12.10.0
+Requires-Dist: httpx>=0.25.1
+Requires-Dist: pandas<3.0.0,>=2.1.2
+Requires-Dist: openai>=1.73.0
+Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
+Requires-Dist: msrest>=0.6.21
+Requires-Dist: Jinja2>=3.1.6
 Provides-Extra: redteam
-Requires-Dist: pyrit>=0.8.0; extra == "redteam"
+Requires-Dist: pyrit==0.8.1; extra == "redteam"
 # Azure AI Evaluation client library for Python
@@ -376,6 +382,34 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
 # Release History
+## 1.6.0 (2025-05-07)
+### Features Added
+- New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
+- Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
+    - AzureOpenAIGrader (general class for experienced users)
+    - AzureOpenAILabelGrader
+    - AzureOpenAIStringCheckGrader
+    - AzureOpenAITextSimilarityGrader
+### Breaking Changes
+- In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
+### Bugs Fixed
+- Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
+    ```
+    "query1": "some query", "response": "some response"
+    ```
+    throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
+    Now, users may import data containing fields with numeric characters.
+## 1.5.0 (2025-04-04)
+### Features Added
+- New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
 ## 1.4.0 (2025-03-27)
 ### Features Added

azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl