azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +82 -0
- azure/ai/evaluation/_common/__init__.py +16 -0
- azure/ai/evaluation/_common/_experimental.py +172 -0
- azure/ai/evaluation/_common/constants.py +72 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/rai_service.py +632 -0
- azure/ai/evaluation/_common/utils.py +445 -0
- azure/ai/evaluation/_constants.py +72 -0
- azure/ai/evaluation/_evaluate/__init__.py +3 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
- azure/ai/evaluation/_evaluate/_utils.py +298 -0
- azure/ai/evaluation/_evaluators/__init__.py +3 -0
- azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
- azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
- azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
- azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
- azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
- azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
- azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
- azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
- azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
- azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
- azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
- azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
- azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
- azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
- azure/ai/evaluation/_exceptions.py +128 -0
- azure/ai/evaluation/_http_utils.py +466 -0
- azure/ai/evaluation/_model_configurations.py +123 -0
- azure/ai/evaluation/_user_agent.py +6 -0
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +5 -0
- azure/ai/evaluation/py.typed +0 -0
- azure/ai/evaluation/simulator/__init__.py +16 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
- azure/ai/evaluation/simulator/_constants.py +27 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
- azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
- azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
- azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
- azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
- azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
- azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
- azure/ai/evaluation/simulator/_simulator.py +716 -0
- azure/ai/evaluation/simulator/_tracing.py +89 -0
- azure/ai/evaluation/simulator/_utils.py +132 -0
- azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
- azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
- azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
- {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
- azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
- azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
- azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import inspect
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Optional, Union
|
|
13
|
+
|
|
14
|
+
from azure.core.credentials import AccessToken, TokenCredential
|
|
15
|
+
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
|
|
16
|
+
|
|
17
|
+
AZURE_TOKEN_REFRESH_INTERVAL = int(
|
|
18
|
+
os.getenv("AZURE_TOKEN_REFRESH_INTERVAL", "600")
|
|
19
|
+
) # token refresh interval in seconds
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TokenScope(Enum):
|
|
23
|
+
"""Token scopes for Azure endpoints"""
|
|
24
|
+
|
|
25
|
+
DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class APITokenManager(ABC):
|
|
29
|
+
"""Base class for managing API tokens. Subclasses should implement the get_token method.
|
|
30
|
+
|
|
31
|
+
:param logger: Logger object
|
|
32
|
+
:type logger: logging.Logger
|
|
33
|
+
:param auth_header: Authorization header prefix. Defaults to "Bearer"
|
|
34
|
+
:type auth_header: str
|
|
35
|
+
:param credential: Azure credential object
|
|
36
|
+
:type credential: Optional[TokenCredential]
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
logger: logging.Logger,
|
|
42
|
+
auth_header: str = "Bearer",
|
|
43
|
+
credential: Optional[TokenCredential] = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
self.logger = logger
|
|
46
|
+
self.auth_header = auth_header
|
|
47
|
+
self._lock: Optional[asyncio.Lock] = None
|
|
48
|
+
if credential is not None:
|
|
49
|
+
self.credential = credential
|
|
50
|
+
else:
|
|
51
|
+
self.credential = self.get_aad_credential()
|
|
52
|
+
self.token: Optional[str] = None
|
|
53
|
+
self.last_refresh_time: Optional[float] = None
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def lock(self) -> asyncio.Lock:
|
|
57
|
+
"""Return object for managing concurrent access to the token.
|
|
58
|
+
|
|
59
|
+
If the lock object does not exist, it will be created first.
|
|
60
|
+
|
|
61
|
+
:return: Lock object
|
|
62
|
+
:rtype: asyncio.Lock
|
|
63
|
+
"""
|
|
64
|
+
if self._lock is None:
|
|
65
|
+
self._lock = asyncio.Lock()
|
|
66
|
+
return self._lock
|
|
67
|
+
|
|
68
|
+
def get_aad_credential(self) -> Union[DefaultAzureCredential, ManagedIdentityCredential]:
|
|
69
|
+
"""Return the AAD credential object.
|
|
70
|
+
|
|
71
|
+
If the environment variable DEFAULT_IDENTITY_CLIENT_ID is set, ManagedIdentityCredential will be used with
|
|
72
|
+
the specified client ID. Otherwise, DefaultAzureCredential will be used.
|
|
73
|
+
|
|
74
|
+
:return: The AAD credential object
|
|
75
|
+
:rtype: Union[DefaultAzureCredential, ManagedIdentityCredential]
|
|
76
|
+
"""
|
|
77
|
+
identity_client_id = os.environ.get("DEFAULT_IDENTITY_CLIENT_ID", None)
|
|
78
|
+
if identity_client_id is not None:
|
|
79
|
+
self.logger.info(f"Using DEFAULT_IDENTITY_CLIENT_ID: {identity_client_id}")
|
|
80
|
+
return ManagedIdentityCredential(client_id=identity_client_id)
|
|
81
|
+
|
|
82
|
+
self.logger.info("Environment variable DEFAULT_IDENTITY_CLIENT_ID is not set, using DefaultAzureCredential")
|
|
83
|
+
return DefaultAzureCredential()
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def get_token(self) -> str:
|
|
87
|
+
"""Async method to get the API token. Subclasses should implement this method.
|
|
88
|
+
|
|
89
|
+
:return: API token
|
|
90
|
+
:rtype: str
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
async def get_token_async(self) -> str:
|
|
95
|
+
"""Async method to get the API token. Subclasses should implement this method.
|
|
96
|
+
|
|
97
|
+
:return: API token
|
|
98
|
+
:rtype: str
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ManagedIdentityAPITokenManager(APITokenManager):
|
|
103
|
+
"""API Token Manager for Azure Managed Identity
|
|
104
|
+
|
|
105
|
+
:param token_scope: Token scope for Azure endpoint
|
|
106
|
+
:type token_scope: ~azure.ai.evaluation.simulator._model_tools.TokenScope
|
|
107
|
+
:param logger: Logger object
|
|
108
|
+
:type logger: logging.Logger
|
|
109
|
+
:keyword kwargs: Additional keyword arguments
|
|
110
|
+
:paramtype kwargs: Dict
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
token_scope: TokenScope,
|
|
116
|
+
logger: logging.Logger,
|
|
117
|
+
*,
|
|
118
|
+
auth_header: str = "Bearer",
|
|
119
|
+
credential: Optional[TokenCredential] = None,
|
|
120
|
+
):
|
|
121
|
+
super().__init__(logger, auth_header=auth_header, credential=credential)
|
|
122
|
+
self.token_scope = token_scope
|
|
123
|
+
|
|
124
|
+
def get_token(self) -> str:
|
|
125
|
+
"""Get the API token. If the token is not available or has expired, refresh the token.
|
|
126
|
+
|
|
127
|
+
:return: API token
|
|
128
|
+
:rtype: str
|
|
129
|
+
"""
|
|
130
|
+
if (
|
|
131
|
+
self.token is None
|
|
132
|
+
or self.last_refresh_time is None
|
|
133
|
+
or time.time() - self.last_refresh_time > AZURE_TOKEN_REFRESH_INTERVAL
|
|
134
|
+
):
|
|
135
|
+
self.last_refresh_time = time.time()
|
|
136
|
+
self.token = self.credential.get_token(self.token_scope.value).token
|
|
137
|
+
self.logger.info("Refreshed Azure endpoint token.")
|
|
138
|
+
|
|
139
|
+
return self.token
|
|
140
|
+
|
|
141
|
+
async def get_token_async(self) -> str:
|
|
142
|
+
"""Get the API token synchronously. If the token is not available or has expired, refresh it.
|
|
143
|
+
|
|
144
|
+
:return: API token
|
|
145
|
+
:rtype: str
|
|
146
|
+
"""
|
|
147
|
+
if (
|
|
148
|
+
self.token is None
|
|
149
|
+
or self.last_refresh_time is None
|
|
150
|
+
or time.time() - self.last_refresh_time > AZURE_TOKEN_REFRESH_INTERVAL
|
|
151
|
+
):
|
|
152
|
+
self.last_refresh_time = time.time()
|
|
153
|
+
get_token_method = self.credential.get_token(self.token_scope.value)
|
|
154
|
+
if inspect.isawaitable(get_token_method):
|
|
155
|
+
# If it's awaitable, await it
|
|
156
|
+
token_response: AccessToken = await get_token_method
|
|
157
|
+
else:
|
|
158
|
+
# Otherwise, call it synchronously
|
|
159
|
+
token_response = get_token_method
|
|
160
|
+
|
|
161
|
+
self.token = token_response.token
|
|
162
|
+
self.logger.info("Refreshed Azure endpoint token.")
|
|
163
|
+
|
|
164
|
+
return self.token
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class PlainTokenManager(APITokenManager):
|
|
168
|
+
"""Plain API Token Manager
|
|
169
|
+
|
|
170
|
+
:param openapi_key: OpenAPI key
|
|
171
|
+
:type openapi_key: str
|
|
172
|
+
:param logger: Logger object
|
|
173
|
+
:type logger: logging.Logger
|
|
174
|
+
:keyword kwargs: Optional keyword arguments
|
|
175
|
+
:paramtype kwargs: Dict
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
def __init__(
|
|
179
|
+
self,
|
|
180
|
+
openapi_key: str,
|
|
181
|
+
logger: logging.Logger,
|
|
182
|
+
*,
|
|
183
|
+
auth_header: str = "Bearer",
|
|
184
|
+
credential: Optional[TokenCredential] = None,
|
|
185
|
+
) -> None:
|
|
186
|
+
super().__init__(logger, auth_header=auth_header, credential=credential)
|
|
187
|
+
self.token: str = openapi_key
|
|
188
|
+
|
|
189
|
+
def get_token(self) -> str:
|
|
190
|
+
"""Get the API token
|
|
191
|
+
|
|
192
|
+
:return: API token
|
|
193
|
+
:rtype: str
|
|
194
|
+
"""
|
|
195
|
+
return self.token
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import asyncio
|
|
5
|
+
import copy
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from typing import Any, Dict, List, Optional, cast
|
|
10
|
+
|
|
11
|
+
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
|
|
12
|
+
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
13
|
+
from azure.core.exceptions import HttpResponseError
|
|
14
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
15
|
+
|
|
16
|
+
from .._model_tools._template_handler import TemplateParameters
|
|
17
|
+
from .models import OpenAIChatCompletionsModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SimulationRequestDTO:
|
|
21
|
+
"""Simulation Request Data Transfer Object
|
|
22
|
+
|
|
23
|
+
:param url: The URL to send the request to.
|
|
24
|
+
:type url: str
|
|
25
|
+
:param headers: The headers to send with the request.
|
|
26
|
+
:type headers: Dict[str, str]
|
|
27
|
+
:param payload: The payload to send with the request.
|
|
28
|
+
:type payload: Dict[str, Any]
|
|
29
|
+
:param params: The parameters to send with the request.
|
|
30
|
+
:type params: Dict[str, str]
|
|
31
|
+
:param template_key: The template key to use for the request.
|
|
32
|
+
:type template_key: str
|
|
33
|
+
:param template_parameters: The template parameters to use for the request.
|
|
34
|
+
:type template_parameters: Dict
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
url: str,
|
|
40
|
+
headers: Dict[str, str],
|
|
41
|
+
payload: Dict[str, Any],
|
|
42
|
+
params: Dict[str, str],
|
|
43
|
+
templatekey: str,
|
|
44
|
+
template_parameters: Optional[TemplateParameters],
|
|
45
|
+
):
|
|
46
|
+
self.url = url
|
|
47
|
+
self.headers = headers
|
|
48
|
+
self.json = json.dumps(payload)
|
|
49
|
+
self.params = params
|
|
50
|
+
self.templatekey = templatekey
|
|
51
|
+
self.templateParameters = template_parameters
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> Dict:
|
|
54
|
+
"""Convert the DTO to a dictionary.
|
|
55
|
+
|
|
56
|
+
:return: The DTO as a dictionary.
|
|
57
|
+
:rtype: Dict
|
|
58
|
+
"""
|
|
59
|
+
toReturn = self.__dict__.copy()
|
|
60
|
+
|
|
61
|
+
if toReturn["templateParameters"] is not None:
|
|
62
|
+
toReturn["templateParameters"] = {str(k): str(v) for k, v in toReturn["templateParameters"].items()}
|
|
63
|
+
|
|
64
|
+
return toReturn
|
|
65
|
+
|
|
66
|
+
def to_json(self):
|
|
67
|
+
"""Convert the DTO to a JSON string.
|
|
68
|
+
|
|
69
|
+
:return: The DTO as a JSON string.
|
|
70
|
+
:rtype: str
|
|
71
|
+
"""
|
|
72
|
+
return json.dumps(self.__dict__)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
|
|
76
|
+
"""A chat completion model that uses a proxy to query the model with a body of data.
|
|
77
|
+
|
|
78
|
+
:param name: The name of the model.
|
|
79
|
+
:type name: str
|
|
80
|
+
:param template_key: The template key to use for the request.
|
|
81
|
+
:type template_key: str
|
|
82
|
+
:param template_parameters: The template parameters to use for the request.
|
|
83
|
+
:type template_parameters: Dict
|
|
84
|
+
:keyword args: Additional arguments to pass to the parent class.
|
|
85
|
+
:keyword kwargs: Additional keyword arguments to pass to the parent class.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def __init__(self, name: str, template_key: str, template_parameters: TemplateParameters, **kwargs) -> None:
|
|
89
|
+
self.tkey = template_key
|
|
90
|
+
self.tparam = template_parameters
|
|
91
|
+
self.result_url: Optional[str] = None
|
|
92
|
+
|
|
93
|
+
super().__init__(name=name, **kwargs)
|
|
94
|
+
|
|
95
|
+
def format_request_data(self, messages: List[Dict], **request_params) -> Dict: # type: ignore[override]
|
|
96
|
+
"""Format the request data to query the model with.
|
|
97
|
+
|
|
98
|
+
:param messages: List of messages to query the model with.
|
|
99
|
+
Expected format: [{"role": "user", "content": "Hello!"}, ...]
|
|
100
|
+
:type messages: List[Dict]
|
|
101
|
+
:keyword request_params: Additional parameters to pass to the model.
|
|
102
|
+
:paramtype request_params: Dict
|
|
103
|
+
:return: The formatted request data.
|
|
104
|
+
:rtype: Dict
|
|
105
|
+
"""
|
|
106
|
+
request_data = {"messages": messages, **self.get_model_params()}
|
|
107
|
+
request_data.update(request_params)
|
|
108
|
+
return request_data
|
|
109
|
+
|
|
110
|
+
async def get_conversation_completion(
|
|
111
|
+
self,
|
|
112
|
+
messages: List[Dict],
|
|
113
|
+
session: AsyncHttpPipeline,
|
|
114
|
+
role: str = "assistant", # pylint: disable=unused-argument
|
|
115
|
+
**request_params,
|
|
116
|
+
) -> dict:
|
|
117
|
+
"""
|
|
118
|
+
Query the model a single time with a message.
|
|
119
|
+
|
|
120
|
+
:param messages: List of messages to query the model with.
|
|
121
|
+
Expected format: [{"role": "user", "content": "Hello!"}, ...]
|
|
122
|
+
:type messages: List[Dict]
|
|
123
|
+
:param session: AsyncHttpPipeline object to query the model with.
|
|
124
|
+
:type session: ~azure.ai.evaluation._http_utils.AsyncHttpPipeline
|
|
125
|
+
:param role: The role of the user sending the message. This parameter is not used in this method;
|
|
126
|
+
however, it must be included to match the method signature of the parent class. Defaults to "assistant".
|
|
127
|
+
:type role: str
|
|
128
|
+
:keyword request_params: Additional parameters to pass to the model.
|
|
129
|
+
:paramtype request_params: Dict
|
|
130
|
+
:return: A dictionary representing the completion of the conversation query.
|
|
131
|
+
:rtype: Dict
|
|
132
|
+
"""
|
|
133
|
+
request_data = self.format_request_data(
|
|
134
|
+
messages=messages,
|
|
135
|
+
**request_params,
|
|
136
|
+
)
|
|
137
|
+
return await self.request_api(
|
|
138
|
+
session=session,
|
|
139
|
+
request_data=request_data,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
async def request_api(
|
|
143
|
+
self,
|
|
144
|
+
session: AsyncHttpPipeline,
|
|
145
|
+
request_data: dict,
|
|
146
|
+
) -> dict:
|
|
147
|
+
"""
|
|
148
|
+
Request the model with a body of data.
|
|
149
|
+
|
|
150
|
+
:param session: HTTPS Session for invoking the endpoint.
|
|
151
|
+
:type session: AsyncHttpPipeline
|
|
152
|
+
:param request_data: Prompt dictionary to query the model with. (Pass {"prompt": prompt} instead of prompt.)
|
|
153
|
+
:type request_data: Dict[str, Any]
|
|
154
|
+
:return: A body of data resulting from the model query.
|
|
155
|
+
:rtype: Dict[str, Any]
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
self._log_request(request_data)
|
|
159
|
+
|
|
160
|
+
token = self.token_manager.get_token()
|
|
161
|
+
|
|
162
|
+
proxy_headers = {
|
|
163
|
+
"Authorization": f"Bearer {token}",
|
|
164
|
+
"Content-Type": "application/json",
|
|
165
|
+
"User-Agent": USER_AGENT,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
headers = {
|
|
169
|
+
"Content-Type": "application/json",
|
|
170
|
+
"X-CV": f"{uuid.uuid4()}",
|
|
171
|
+
"X-ModelType": self.model or "",
|
|
172
|
+
}
|
|
173
|
+
# add all additional headers
|
|
174
|
+
headers.update(self.additional_headers) # type: ignore[arg-type]
|
|
175
|
+
params = {}
|
|
176
|
+
if self.api_version:
|
|
177
|
+
params["api-version"] = self.api_version
|
|
178
|
+
|
|
179
|
+
sim_request_dto = SimulationRequestDTO(
|
|
180
|
+
url=self.endpoint_url,
|
|
181
|
+
headers=headers,
|
|
182
|
+
payload=request_data,
|
|
183
|
+
params=params,
|
|
184
|
+
templatekey=self.tkey,
|
|
185
|
+
template_parameters=self.tparam,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
time_start = time.time()
|
|
189
|
+
full_response = None
|
|
190
|
+
|
|
191
|
+
response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
|
|
192
|
+
|
|
193
|
+
if response.status_code != 202:
|
|
194
|
+
raise HttpResponseError(
|
|
195
|
+
message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
response_data = response.json()
|
|
199
|
+
self.result_url = cast(str, response_data["location"])
|
|
200
|
+
|
|
201
|
+
retry_policy = AsyncRetryPolicy( # set up retry configuration
|
|
202
|
+
retry_on_status_codes=[202], # on which statuses to retry
|
|
203
|
+
retry_total=7,
|
|
204
|
+
retry_backoff_factor=10.0,
|
|
205
|
+
retry_backoff_max=180,
|
|
206
|
+
retry_mode=RetryMode.Exponential,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# initial 15 seconds wait before attempting to fetch result
|
|
210
|
+
# Need to wait both in this thread and in the async thread for some reason?
|
|
211
|
+
# Someone not under a crunch and with better async understandings should dig into this more.
|
|
212
|
+
await asyncio.sleep(15)
|
|
213
|
+
time.sleep(15)
|
|
214
|
+
|
|
215
|
+
async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
|
|
216
|
+
token = await self.token_manager.get_token_async()
|
|
217
|
+
proxy_headers = {
|
|
218
|
+
"Authorization": f"Bearer {token}",
|
|
219
|
+
"Content-Type": "application/json",
|
|
220
|
+
"User-Agent": USER_AGENT,
|
|
221
|
+
}
|
|
222
|
+
response = await exp_retry_client.get( # pylint: disable=too-many-function-args,unexpected-keyword-arg
|
|
223
|
+
self.result_url, headers=proxy_headers
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
response.raise_for_status()
|
|
227
|
+
|
|
228
|
+
response_data = response.json()
|
|
229
|
+
self.logger.info("Response: %s", response_data)
|
|
230
|
+
|
|
231
|
+
# Copy the full response and return it to be saved in jsonl.
|
|
232
|
+
full_response = copy.copy(response_data)
|
|
233
|
+
|
|
234
|
+
time_taken = time.time() - time_start
|
|
235
|
+
|
|
236
|
+
# pylint: disable=unexpected-keyword-arg
|
|
237
|
+
parsed_response = self._parse_response(response_data, request_data=request_data) # type: ignore[call-arg]
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"request": request_data,
|
|
241
|
+
"response": parsed_response,
|
|
242
|
+
"time_taken": time_taken,
|
|
243
|
+
"full_response": full_response,
|
|
244
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import urljoin, urlparse
|
|
7
|
+
|
|
8
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
9
|
+
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
|
|
10
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
11
|
+
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
12
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
13
|
+
|
|
14
|
+
from ._identity_manager import APITokenManager
|
|
15
|
+
|
|
16
|
+
api_url = None
|
|
17
|
+
if "RAI_SVC_URL" in os.environ:
|
|
18
|
+
api_url = os.environ["RAI_SVC_URL"]
|
|
19
|
+
api_url = api_url.rstrip("/")
|
|
20
|
+
print(f"Found RAI_SVC_URL in environment variable, using {api_url} for the service endpoint.")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
24
|
+
"""Client for the Responsible AI Service
|
|
25
|
+
|
|
26
|
+
:param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
|
|
27
|
+
name.
|
|
28
|
+
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
29
|
+
:param token_manager: The token manager
|
|
30
|
+
:type token_manage: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
|
|
34
|
+
self, azure_ai_project: AzureAIProject, token_manager: APITokenManager
|
|
35
|
+
) -> None:
|
|
36
|
+
self.azure_ai_project = azure_ai_project
|
|
37
|
+
self.token_manager = token_manager
|
|
38
|
+
|
|
39
|
+
self.contentharm_parameters = None
|
|
40
|
+
self.jailbreaks_dataset = None
|
|
41
|
+
|
|
42
|
+
if api_url is not None:
|
|
43
|
+
host = api_url
|
|
44
|
+
|
|
45
|
+
else:
|
|
46
|
+
host = self._get_service_discovery_url()
|
|
47
|
+
segments = [
|
|
48
|
+
host.rstrip("/"),
|
|
49
|
+
"raisvc/v1.0/subscriptions",
|
|
50
|
+
self.azure_ai_project["subscription_id"],
|
|
51
|
+
"resourceGroups",
|
|
52
|
+
self.azure_ai_project["resource_group_name"],
|
|
53
|
+
"providers/Microsoft.MachineLearningServices/workspaces",
|
|
54
|
+
self.azure_ai_project["project_name"],
|
|
55
|
+
]
|
|
56
|
+
self.api_url = "/".join(segments)
|
|
57
|
+
# add a "/" at the end of the url
|
|
58
|
+
self.api_url = self.api_url.rstrip("/") + "/"
|
|
59
|
+
self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
|
|
60
|
+
self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
|
|
61
|
+
self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
|
|
62
|
+
self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
|
|
63
|
+
|
|
64
|
+
def _get_service_discovery_url(self):
|
|
65
|
+
bearer_token = self.token_manager.get_token()
|
|
66
|
+
headers = {"Authorization": f"Bearer {bearer_token}", "Content-Type": "application/json"}
|
|
67
|
+
http_client = get_http_client()
|
|
68
|
+
response = http_client.get( # pylint: disable=too-many-function-args,unexpected-keyword-arg
|
|
69
|
+
f"https://management.azure.com/subscriptions/{self.azure_ai_project['subscription_id']}/"
|
|
70
|
+
f"resourceGroups/{self.azure_ai_project['resource_group_name']}/"
|
|
71
|
+
f"providers/Microsoft.MachineLearningServices/workspaces/{self.azure_ai_project['project_name']}?"
|
|
72
|
+
f"api-version=2023-08-01-preview",
|
|
73
|
+
headers=headers,
|
|
74
|
+
timeout=5,
|
|
75
|
+
)
|
|
76
|
+
if response.status_code != 200:
|
|
77
|
+
msg = (
|
|
78
|
+
f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
|
|
79
|
+
f"correctly, and make sure you have the necessary access permissions. "
|
|
80
|
+
f"Status code: {response.status_code}."
|
|
81
|
+
)
|
|
82
|
+
raise EvaluationException(
|
|
83
|
+
message=msg,
|
|
84
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
85
|
+
category=ErrorCategory.PROJECT_ACCESS_ERROR,
|
|
86
|
+
blame=ErrorBlame.USER_ERROR,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
|
|
90
|
+
return f"{base_url.scheme}://{base_url.netloc}"
|
|
91
|
+
|
|
92
|
+
def _create_async_client(self) -> AsyncHttpPipeline:
|
|
93
|
+
"""Create an async http client with retry mechanism
|
|
94
|
+
|
|
95
|
+
Number of retries is set to 6, and the timeout is set to 5 seconds.
|
|
96
|
+
|
|
97
|
+
:return: The async http client
|
|
98
|
+
:rtype: ~azure.ai.evaluation._http_utils.AsyncHttpPipeline
|
|
99
|
+
"""
|
|
100
|
+
return get_async_http_client().with_policies(
|
|
101
|
+
retry_policy=AsyncRetryPolicy(retry_total=6, retry_backoff_factor=5, retry_mode=RetryMode.Fixed)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def get_contentharm_parameters(self) -> Any:
|
|
105
|
+
"""Get the content harm parameters, if they exist"""
|
|
106
|
+
if self.contentharm_parameters is None:
|
|
107
|
+
self.contentharm_parameters = await self.get(self.parameter_json_endpoint)
|
|
108
|
+
|
|
109
|
+
return self.contentharm_parameters
|
|
110
|
+
|
|
111
|
+
async def get_jailbreaks_dataset(self, type: str) -> Any:
|
|
112
|
+
"""Get the jailbreaks dataset, if exists
|
|
113
|
+
|
|
114
|
+
:param type: The dataset type. Should be one of 'xpia' or 'upia'
|
|
115
|
+
:type type: str
|
|
116
|
+
"""
|
|
117
|
+
if self.jailbreaks_dataset is None:
|
|
118
|
+
if type == "xpia":
|
|
119
|
+
self.jailbreaks_dataset = await self.get(self.xpia_jailbreaks_json_endpoint)
|
|
120
|
+
elif type == "upia":
|
|
121
|
+
self.jailbreaks_dataset = await self.get(self.jailbreaks_json_endpoint)
|
|
122
|
+
else:
|
|
123
|
+
msg = f"Invalid jailbreak type: {type}. Supported types: ['xpia', 'upia']"
|
|
124
|
+
raise EvaluationException(
|
|
125
|
+
message=msg,
|
|
126
|
+
internal_message=msg,
|
|
127
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
128
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
129
|
+
blame=ErrorBlame.USER_ERROR,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return self.jailbreaks_dataset
|
|
133
|
+
|
|
134
|
+
async def get(self, url: str) -> Any:
|
|
135
|
+
"""Make a GET request to the given url
|
|
136
|
+
|
|
137
|
+
:param url: The url
|
|
138
|
+
:type url: str
|
|
139
|
+
:raises EvaluationException: If the Azure safety evaluation service is not available in the current region
|
|
140
|
+
:return: The response
|
|
141
|
+
:rtype: Any
|
|
142
|
+
"""
|
|
143
|
+
token = self.token_manager.get_token()
|
|
144
|
+
headers = {
|
|
145
|
+
"Authorization": f"Bearer {token}",
|
|
146
|
+
"Content-Type": "application/json",
|
|
147
|
+
"User-Agent": USER_AGENT,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
session = self._create_async_client()
|
|
151
|
+
|
|
152
|
+
async with session:
|
|
153
|
+
response = await session.get(url=url, headers=headers) # pylint: disable=unexpected-keyword-arg
|
|
154
|
+
|
|
155
|
+
if response.status_code == 200:
|
|
156
|
+
return response.json()
|
|
157
|
+
|
|
158
|
+
msg = (
|
|
159
|
+
"Azure safety evaluation service is not available in your current region, "
|
|
160
|
+
+ "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
|
|
161
|
+
)
|
|
162
|
+
raise EvaluationException(
|
|
163
|
+
message=msg,
|
|
164
|
+
internal_message=msg,
|
|
165
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
166
|
+
category=ErrorCategory.UNKNOWN,
|
|
167
|
+
blame=ErrorBlame.USER_ERROR,
|
|
168
|
+
)
|