azure-ai-evaluation 1.0.0b5__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +188 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +118 -0
- azure/ai/evaluation/_common/_experimental.py +4 -0
- azure/ai/evaluation/_common/math.py +62 -2
- azure/ai/evaluation/_common/rai_service.py +110 -50
- azure/ai/evaluation/_common/utils.py +50 -16
- azure/ai/evaluation/_constants.py +2 -0
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +13 -3
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +12 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +38 -43
- azure/ai/evaluation/_evaluate/_evaluate.py +62 -131
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +2 -1
- azure/ai/evaluation/_evaluate/_utils.py +72 -38
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +16 -17
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +60 -29
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +88 -6
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +16 -3
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +39 -10
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +58 -52
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +79 -34
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +73 -34
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +74 -33
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +76 -34
- azure/ai/evaluation/_evaluators/_eci/_eci.py +28 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -13
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +57 -26
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +13 -15
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +68 -30
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +17 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +10 -8
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -2
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +6 -2
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +10 -6
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +6 -2
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +6 -2
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +6 -2
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +57 -34
- azure/ai/evaluation/_evaluators/_qa/_qa.py +25 -37
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +63 -29
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +76 -161
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +24 -25
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +65 -67
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +26 -20
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +74 -40
- azure/ai/evaluation/_exceptions.py +2 -0
- azure/ai/evaluation/_http_utils.py +6 -4
- azure/ai/evaluation/_model_configurations.py +65 -14
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +17 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +57 -47
- azure/ai/evaluation/simulator/_constants.py +11 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +128 -7
- azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +16 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +12 -1
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +3 -1
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +48 -4
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -0
- azure/ai/evaluation/simulator/_simulator.py +54 -45
- azure/ai/evaluation/simulator/_utils.py +25 -7
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/METADATA +240 -327
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/RECORD +71 -68
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -322
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -9,12 +9,12 @@ import time
|
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
|
|
11
11
|
|
|
12
|
+
import re
|
|
12
13
|
import jinja2
|
|
13
14
|
|
|
14
15
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
16
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
16
|
-
|
|
17
|
-
from .._model_tools import LLMBase, OpenAIChatCompletionsModel
|
|
17
|
+
from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
|
|
18
18
|
from .._model_tools._template_handler import TemplateParameters
|
|
19
19
|
from .constants import ConversationRole
|
|
20
20
|
|
|
@@ -128,15 +128,19 @@ class ConversationBot:
|
|
|
128
128
|
self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
|
|
129
129
|
if role == ConversationRole.USER:
|
|
130
130
|
if "conversation_starter" in self.persona_template_args:
|
|
131
|
+
print(self.persona_template_args)
|
|
131
132
|
conversation_starter_content = self.persona_template_args["conversation_starter"]
|
|
132
133
|
if isinstance(conversation_starter_content, dict):
|
|
133
134
|
self.conversation_starter = conversation_starter_content
|
|
135
|
+
print(f"Conversation starter content: {conversation_starter_content}")
|
|
134
136
|
else:
|
|
135
137
|
try:
|
|
136
138
|
self.conversation_starter = jinja2.Template(
|
|
137
139
|
conversation_starter_content, undefined=jinja2.StrictUndefined
|
|
138
140
|
)
|
|
139
|
-
|
|
141
|
+
print("Successfully created a Jinja2 template for the conversation starter.")
|
|
142
|
+
except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
|
|
143
|
+
print(f"Template syntax error: {e}. Using raw content.")
|
|
140
144
|
self.conversation_starter = conversation_starter_content
|
|
141
145
|
else:
|
|
142
146
|
self.logger.info(
|
|
@@ -175,6 +179,9 @@ class ConversationBot:
|
|
|
175
179
|
samples = [self.conversation_starter.render(**self.persona_template_args)]
|
|
176
180
|
else:
|
|
177
181
|
samples = [self.conversation_starter]
|
|
182
|
+
jailbreak_string = self.persona_template_args.get("jailbreak_string", None)
|
|
183
|
+
if jailbreak_string:
|
|
184
|
+
samples = [f"{jailbreak_string} {samples[0]}"]
|
|
178
185
|
time_taken = 0
|
|
179
186
|
|
|
180
187
|
finish_reason = ["stop"]
|
|
@@ -271,8 +278,6 @@ class CallbackConversationBot(ConversationBot):
|
|
|
271
278
|
"id": None,
|
|
272
279
|
"template_parameters": {},
|
|
273
280
|
}
|
|
274
|
-
self.logger.info("Using user provided callback returning response.")
|
|
275
|
-
|
|
276
281
|
time_taken = end_time - start_time
|
|
277
282
|
try:
|
|
278
283
|
response = {
|
|
@@ -290,8 +295,6 @@ class CallbackConversationBot(ConversationBot):
|
|
|
290
295
|
blame=ErrorBlame.USER_ERROR,
|
|
291
296
|
) from exc
|
|
292
297
|
|
|
293
|
-
self.logger.info("Parsed callback response")
|
|
294
|
-
|
|
295
298
|
return response, {}, time_taken, result
|
|
296
299
|
|
|
297
300
|
# Bug 3354264: template is unused in the method - is this intentional?
|
|
@@ -308,9 +311,127 @@ class CallbackConversationBot(ConversationBot):
|
|
|
308
311
|
}
|
|
309
312
|
|
|
310
313
|
|
|
314
|
+
class MultiModalConversationBot(ConversationBot):
|
|
315
|
+
"""MultiModal Conversation bot that uses a user provided callback to generate responses.
|
|
316
|
+
|
|
317
|
+
:param callback: The callback function to use to generate responses.
|
|
318
|
+
:type callback: Callable
|
|
319
|
+
:param user_template: The template to use for the request.
|
|
320
|
+
:type user_template: str
|
|
321
|
+
:param user_template_parameters: The template parameters to use for the request.
|
|
322
|
+
:type user_template_parameters: Dict
|
|
323
|
+
:param args: Optional arguments to pass to the parent class.
|
|
324
|
+
:type args: Any
|
|
325
|
+
:param kwargs: Optional keyword arguments to pass to the parent class.
|
|
326
|
+
:type kwargs: Any
|
|
327
|
+
"""
|
|
328
|
+
|
|
329
|
+
def __init__(
|
|
330
|
+
self,
|
|
331
|
+
callback: Callable,
|
|
332
|
+
user_template: str,
|
|
333
|
+
user_template_parameters: TemplateParameters,
|
|
334
|
+
rai_client: RAIClient,
|
|
335
|
+
*args,
|
|
336
|
+
**kwargs,
|
|
337
|
+
) -> None:
|
|
338
|
+
self.callback = callback
|
|
339
|
+
self.user_template = user_template
|
|
340
|
+
self.user_template_parameters = user_template_parameters
|
|
341
|
+
self.rai_client = rai_client
|
|
342
|
+
|
|
343
|
+
super().__init__(*args, **kwargs)
|
|
344
|
+
|
|
345
|
+
async def generate_response(
|
|
346
|
+
self,
|
|
347
|
+
session: AsyncHttpPipeline,
|
|
348
|
+
conversation_history: List[Any],
|
|
349
|
+
max_history: int,
|
|
350
|
+
turn_number: int = 0,
|
|
351
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
352
|
+
previous_prompt = conversation_history[-1]
|
|
353
|
+
chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
|
|
354
|
+
|
|
355
|
+
# replace prompt with {image.jpg} tags with image content data.
|
|
356
|
+
conversation_history.pop()
|
|
357
|
+
conversation_history.append(
|
|
358
|
+
ConversationTurn(
|
|
359
|
+
role=previous_prompt.role,
|
|
360
|
+
name=previous_prompt.name,
|
|
361
|
+
message=chat_protocol_message["messages"][0]["content"],
|
|
362
|
+
full_response=previous_prompt.full_response,
|
|
363
|
+
request=chat_protocol_message,
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
msg_copy = copy.deepcopy(chat_protocol_message)
|
|
367
|
+
result = {}
|
|
368
|
+
start_time = time.time()
|
|
369
|
+
result = await self.callback(msg_copy)
|
|
370
|
+
end_time = time.time()
|
|
371
|
+
if not result:
|
|
372
|
+
result = {
|
|
373
|
+
"messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
|
|
374
|
+
"finish_reason": ["stop"],
|
|
375
|
+
"id": None,
|
|
376
|
+
"template_parameters": {},
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
time_taken = end_time - start_time
|
|
380
|
+
try:
|
|
381
|
+
response = {
|
|
382
|
+
"samples": [result["messages"][-1]["content"]],
|
|
383
|
+
"finish_reason": ["stop"],
|
|
384
|
+
"id": None,
|
|
385
|
+
}
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
msg = "User provided callback does not conform to chat protocol standard."
|
|
388
|
+
raise EvaluationException(
|
|
389
|
+
message=msg,
|
|
390
|
+
internal_message=msg,
|
|
391
|
+
target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
|
|
392
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
393
|
+
blame=ErrorBlame.USER_ERROR,
|
|
394
|
+
) from exc
|
|
395
|
+
|
|
396
|
+
return response, chat_protocol_message, time_taken, result
|
|
397
|
+
|
|
398
|
+
async def _to_chat_protocol(self, conversation_history, template_parameters): # pylint: disable=unused-argument
|
|
399
|
+
messages = []
|
|
400
|
+
|
|
401
|
+
for _, m in enumerate(conversation_history):
|
|
402
|
+
if "image:" in m.message:
|
|
403
|
+
content = await self._to_multi_modal_content(m.message)
|
|
404
|
+
messages.append({"content": content, "role": m.role.value})
|
|
405
|
+
else:
|
|
406
|
+
messages.append({"content": m.message, "role": m.role.value})
|
|
407
|
+
|
|
408
|
+
return {
|
|
409
|
+
"template_parameters": template_parameters,
|
|
410
|
+
"messages": messages,
|
|
411
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
async def _to_multi_modal_content(self, text: str) -> list:
|
|
415
|
+
split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
|
|
416
|
+
messages = [
|
|
417
|
+
text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
|
|
418
|
+
]
|
|
419
|
+
contents = []
|
|
420
|
+
for msg in messages:
|
|
421
|
+
if msg.startswith("image_understanding/"):
|
|
422
|
+
encoded_image = await self.rai_client.get_image_data(msg)
|
|
423
|
+
contents.append(
|
|
424
|
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
|
|
425
|
+
)
|
|
426
|
+
else:
|
|
427
|
+
contents.append({"type": "text", "text": msg})
|
|
428
|
+
return contents
|
|
429
|
+
|
|
430
|
+
|
|
311
431
|
__all__ = [
|
|
312
432
|
"ConversationRole",
|
|
313
433
|
"ConversationBot",
|
|
314
434
|
"CallbackConversationBot",
|
|
435
|
+
"MultiModalConversationBot",
|
|
315
436
|
"ConversationTurn",
|
|
316
437
|
]
|
|
@@ -9,7 +9,6 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
|
9
9
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
10
|
from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
11
11
|
from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
12
|
-
|
|
13
12
|
from ..._http_utils import AsyncHttpPipeline
|
|
14
13
|
from . import ConversationBot, ConversationTurn
|
|
15
14
|
|
|
@@ -11,6 +11,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
11
11
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
12
12
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
13
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
14
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
14
15
|
from azure.core.credentials import TokenCredential
|
|
15
16
|
|
|
16
17
|
from ._adversarial_simulator import AdversarialSimulator
|
|
@@ -30,9 +31,18 @@ class DirectAttackSimulator:
|
|
|
30
31
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
31
32
|
:param credential: The credential for connecting to Azure AI project.
|
|
32
33
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
34
|
+
|
|
35
|
+
.. admonition:: Example:
|
|
36
|
+
|
|
37
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
38
|
+
:start-after: [START direct_attack_simulator]
|
|
39
|
+
:end-before: [END direct_attack_simulator]
|
|
40
|
+
:language: python
|
|
41
|
+
:dedent: 8
|
|
42
|
+
:caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
|
|
33
43
|
"""
|
|
34
44
|
|
|
35
|
-
def __init__(self, *, azure_ai_project:
|
|
45
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
36
46
|
"""Constructor."""
|
|
37
47
|
|
|
38
48
|
try:
|
|
@@ -125,7 +135,7 @@ class DirectAttackSimulator:
|
|
|
125
135
|
- '**$schema**': A string indicating the schema URL for the conversation format.
|
|
126
136
|
|
|
127
137
|
The 'content' for 'assistant' role messages may includes the messages that your callback returned.
|
|
128
|
-
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
138
|
+
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
129
139
|
|
|
130
140
|
**Output format**
|
|
131
141
|
|
|
@@ -178,9 +188,7 @@ class DirectAttackSimulator:
|
|
|
178
188
|
if not randomization_seed:
|
|
179
189
|
randomization_seed = randint(0, 1000000)
|
|
180
190
|
|
|
181
|
-
regular_sim = AdversarialSimulator(
|
|
182
|
-
azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential
|
|
183
|
-
)
|
|
191
|
+
regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
184
192
|
regular_sim_results = await regular_sim(
|
|
185
193
|
scenario=scenario,
|
|
186
194
|
target=target,
|
|
@@ -190,10 +198,10 @@ class DirectAttackSimulator:
|
|
|
190
198
|
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
191
199
|
api_call_delay_sec=api_call_delay_sec,
|
|
192
200
|
concurrent_async_task=concurrent_async_task,
|
|
193
|
-
randomize_order=
|
|
201
|
+
randomize_order=False,
|
|
194
202
|
randomization_seed=randomization_seed,
|
|
195
203
|
)
|
|
196
|
-
jb_sim = AdversarialSimulator(azure_ai_project=
|
|
204
|
+
jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
197
205
|
jb_sim_results = await jb_sim(
|
|
198
206
|
scenario=scenario,
|
|
199
207
|
target=target,
|
|
@@ -204,7 +212,7 @@ class DirectAttackSimulator:
|
|
|
204
212
|
api_call_delay_sec=api_call_delay_sec,
|
|
205
213
|
concurrent_async_task=concurrent_async_task,
|
|
206
214
|
_jailbreak_type="upia",
|
|
207
|
-
randomize_order=
|
|
215
|
+
randomize_order=False,
|
|
208
216
|
randomization_seed=randomization_seed,
|
|
209
217
|
)
|
|
210
218
|
return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
|
|
@@ -13,6 +13,7 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
|
13
13
|
from azure.ai.evaluation._common._experimental import experimental
|
|
14
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
15
|
from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
|
|
16
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
16
17
|
from azure.core.credentials import TokenCredential
|
|
17
18
|
|
|
18
19
|
from ._adversarial_simulator import AdversarialSimulator, JsonLineList
|
|
@@ -32,9 +33,18 @@ class IndirectAttackSimulator(AdversarialSimulator):
|
|
|
32
33
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
33
34
|
:param credential: The credential for connecting to Azure AI project.
|
|
34
35
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
36
|
+
|
|
37
|
+
.. admonition:: Example:
|
|
38
|
+
|
|
39
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
40
|
+
:start-after: [START indirect_attack_simulator]
|
|
41
|
+
:end-before: [END indirect_attack_simulator]
|
|
42
|
+
:language: python
|
|
43
|
+
:dedent: 8
|
|
44
|
+
:caption: Run the IndirectAttackSimulator to produce 1 result with 1 conversation turn (2 messages in the result).
|
|
35
45
|
"""
|
|
36
46
|
|
|
37
|
-
def __init__(self, *, azure_ai_project:
|
|
47
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
38
48
|
"""Constructor."""
|
|
39
49
|
|
|
40
50
|
try:
|
|
@@ -179,6 +189,7 @@ class IndirectAttackSimulator(AdversarialSimulator):
|
|
|
179
189
|
api_call_delay_sec=api_call_delay_sec,
|
|
180
190
|
language=language,
|
|
181
191
|
semaphore=semaphore,
|
|
192
|
+
scenario=scenario,
|
|
182
193
|
)
|
|
183
194
|
)
|
|
184
195
|
)
|
|
@@ -14,7 +14,9 @@ from typing import Optional, Union
|
|
|
14
14
|
from azure.core.credentials import AccessToken, TokenCredential
|
|
15
15
|
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
|
|
16
16
|
|
|
17
|
-
AZURE_TOKEN_REFRESH_INTERVAL =
|
|
17
|
+
AZURE_TOKEN_REFRESH_INTERVAL = int(
|
|
18
|
+
os.getenv("AZURE_TOKEN_REFRESH_INTERVAL", "600")
|
|
19
|
+
) # token refresh interval in seconds
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class TokenScope(Enum):
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import os
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urljoin, urlparse
|
|
7
|
+
import base64
|
|
7
8
|
|
|
8
9
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
9
10
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
|
|
@@ -57,6 +58,7 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
57
58
|
# add a "/" at the end of the url
|
|
58
59
|
self.api_url = self.api_url.rstrip("/") + "/"
|
|
59
60
|
self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
|
|
61
|
+
self.parameter_image_endpoint = urljoin(self.api_url, "simulation/template/parameters/image")
|
|
60
62
|
self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
|
|
61
63
|
self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
|
|
62
64
|
self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
|
|
@@ -74,14 +76,18 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
74
76
|
timeout=5,
|
|
75
77
|
)
|
|
76
78
|
if response.status_code != 200:
|
|
77
|
-
msg =
|
|
79
|
+
msg = (
|
|
80
|
+
f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
|
|
81
|
+
f"correctly, and make sure you have the necessary access permissions. "
|
|
82
|
+
f"Status code: {response.status_code}."
|
|
83
|
+
)
|
|
78
84
|
raise EvaluationException(
|
|
79
85
|
message=msg,
|
|
80
|
-
internal_message=msg,
|
|
81
86
|
target=ErrorTarget.RAI_CLIENT,
|
|
82
|
-
category=ErrorCategory.
|
|
83
|
-
blame=ErrorBlame.
|
|
87
|
+
category=ErrorCategory.PROJECT_ACCESS_ERROR,
|
|
88
|
+
blame=ErrorBlame.USER_ERROR,
|
|
84
89
|
)
|
|
90
|
+
|
|
85
91
|
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
|
|
86
92
|
return f"{base_url.scheme}://{base_url.netloc}"
|
|
87
93
|
|
|
@@ -162,3 +168,41 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
162
168
|
category=ErrorCategory.UNKNOWN,
|
|
163
169
|
blame=ErrorBlame.USER_ERROR,
|
|
164
170
|
)
|
|
171
|
+
|
|
172
|
+
async def get_image_data(self, path: str) -> Any:
|
|
173
|
+
"""Make a GET Image request to the given url
|
|
174
|
+
|
|
175
|
+
:param path: The url of the image
|
|
176
|
+
:type path: str
|
|
177
|
+
:raises EvaluationException: If the Azure safety evaluation service is not available in the current region
|
|
178
|
+
:return: The response
|
|
179
|
+
:rtype: Any
|
|
180
|
+
"""
|
|
181
|
+
token = self.token_manager.get_token()
|
|
182
|
+
headers = {
|
|
183
|
+
"Authorization": f"Bearer {token}",
|
|
184
|
+
"Content-Type": "application/json",
|
|
185
|
+
"User-Agent": USER_AGENT,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
session = self._create_async_client()
|
|
189
|
+
params = {"path": path}
|
|
190
|
+
async with session:
|
|
191
|
+
response = await session.get(
|
|
192
|
+
url=self.parameter_image_endpoint, params=params, headers=headers
|
|
193
|
+
) # pylint: disable=unexpected-keyword-arg
|
|
194
|
+
|
|
195
|
+
if response.status_code == 200:
|
|
196
|
+
return base64.b64encode(response.content).decode("utf-8")
|
|
197
|
+
|
|
198
|
+
msg = (
|
|
199
|
+
"Azure safety evaluation service is not available in your current region, "
|
|
200
|
+
+ "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
|
|
201
|
+
)
|
|
202
|
+
raise EvaluationException(
|
|
203
|
+
message=msg,
|
|
204
|
+
internal_message=msg,
|
|
205
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
206
|
+
category=ErrorCategory.UNKNOWN,
|
|
207
|
+
blame=ErrorBlame.USER_ERROR,
|
|
208
|
+
)
|