azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +9 -0
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
- azure/ai/evaluation/_aoai/label_grader.py +66 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
- azure/ai/evaluation/_azure/_clients.py +4 -4
- azure/ai/evaluation/_azure/_envs.py +208 -0
- azure/ai/evaluation/_azure/_token_manager.py +12 -7
- azure/ai/evaluation/_common/__init__.py +5 -0
- azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +139 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +158 -28
- azure/ai/evaluation/_common/raiclient/_version.py +1 -1
- azure/ai/evaluation/_common/utils.py +79 -1
- azure/ai/evaluation/_constants.py +16 -0
- azure/ai/evaluation/_eval_mapping.py +71 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +325 -74
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
- azure/ai/evaluation/_evaluate/_utils.py +117 -4
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +8 -3
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +7 -2
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
- azure/ai/evaluation/_exceptions.py +2 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
- azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +825 -450
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
- azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA +25 -2
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/RECORD +123 -65
- /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mapping utilities for metrics and risk categories in Red Team Agent.
|
|
3
|
+
"""
|
|
4
|
+
from .._attack_objective_generator import RiskCategory
|
|
5
|
+
from azure.ai.evaluation._constants import EvaluationMetrics
|
|
6
|
+
|
|
7
|
+
# Mapping of risk categories to their corresponding evaluation metrics
|
|
8
|
+
RISK_CATEGORY_METRIC_MAP = {
|
|
9
|
+
RiskCategory.Violence: EvaluationMetrics.VIOLENCE,
|
|
10
|
+
RiskCategory.HateUnfairness: EvaluationMetrics.HATE_UNFAIRNESS,
|
|
11
|
+
RiskCategory.Sexual: EvaluationMetrics.SEXUAL,
|
|
12
|
+
RiskCategory.SelfHarm: EvaluationMetrics.SELF_HARM
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
def get_metric_from_risk_category(risk_category: RiskCategory) -> str:
|
|
16
|
+
"""Get the appropriate evaluation metric for a given risk category.
|
|
17
|
+
|
|
18
|
+
:param risk_category: The risk category to map to an evaluation metric
|
|
19
|
+
:type risk_category: RiskCategory
|
|
20
|
+
:return: The corresponding evaluation metric
|
|
21
|
+
:rtype: str
|
|
22
|
+
"""
|
|
23
|
+
return RISK_CATEGORY_METRIC_MAP.get(risk_category, EvaluationMetrics.HATE_UNFAIRNESS)
|
|
@@ -189,4 +189,4 @@ def get_orchestrators_for_attack_strategies(attack_strategies: List[Union[Attack
|
|
|
189
189
|
# Since we're just returning one orchestrator type for now, simplify the logic
|
|
190
190
|
# This can be expanded later if different orchestrators are needed for different strategies
|
|
191
191
|
return [lambda chat_target, all_prompts, converter, strategy_name, risk_category:
|
|
192
|
-
None] # This will be replaced with the actual orchestrator function in the main class
|
|
192
|
+
None] # This will be replaced with the actual orchestrator function in the main class
|
|
@@ -12,12 +12,14 @@ import uuid
|
|
|
12
12
|
from tqdm import tqdm
|
|
13
13
|
|
|
14
14
|
from azure.ai.evaluation._common._experimental import experimental
|
|
15
|
-
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
15
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
16
|
+
from azure.ai.evaluation._common.onedp._client import AIProjectClient
|
|
16
17
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
17
18
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
18
19
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
19
20
|
from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
|
|
20
21
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
22
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
21
23
|
from azure.core.credentials import TokenCredential
|
|
22
24
|
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
23
25
|
|
|
@@ -35,7 +37,6 @@ from ._model_tools import (
|
|
|
35
37
|
ManagedIdentityAPITokenManager,
|
|
36
38
|
ProxyChatCompletionsModel,
|
|
37
39
|
RAIClient,
|
|
38
|
-
TokenScope,
|
|
39
40
|
)
|
|
40
41
|
from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
|
|
41
42
|
from ._utils import JsonLineList
|
|
@@ -48,9 +49,9 @@ class AdversarialSimulator:
|
|
|
48
49
|
"""
|
|
49
50
|
Initializes the adversarial simulator with a project scope.
|
|
50
51
|
|
|
51
|
-
:param azure_ai_project: The
|
|
52
|
-
name.
|
|
53
|
-
:type azure_ai_project:
|
|
52
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
53
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
54
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
54
55
|
:param credential: The credential for connecting to Azure AI project.
|
|
55
56
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
56
57
|
|
|
@@ -65,26 +66,37 @@ class AdversarialSimulator:
|
|
|
65
66
|
2 conversation turns each (4 messages per result).
|
|
66
67
|
"""
|
|
67
68
|
|
|
68
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
69
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
69
70
|
"""Constructor."""
|
|
70
71
|
|
|
71
|
-
|
|
72
|
-
self.azure_ai_project =
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
72
|
+
if is_onedp_project(azure_ai_project):
|
|
73
|
+
self.azure_ai_project = azure_ai_project
|
|
74
|
+
self.credential = cast(TokenCredential, credential)
|
|
75
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
76
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
77
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
78
|
+
credential=self.credential,
|
|
79
|
+
)
|
|
80
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
81
|
+
else:
|
|
82
|
+
try:
|
|
83
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
84
|
+
except EvaluationException as e:
|
|
85
|
+
raise EvaluationException(
|
|
86
|
+
message=e.message,
|
|
87
|
+
internal_message=e.internal_message,
|
|
88
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
89
|
+
category=e.category,
|
|
90
|
+
blame=e.blame,
|
|
91
|
+
) from e
|
|
92
|
+
self.credential = cast(TokenCredential, credential)
|
|
93
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
94
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
95
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
96
|
+
credential=self.credential,
|
|
97
|
+
)
|
|
98
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
99
|
+
|
|
88
100
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
89
101
|
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
90
102
|
)
|
|
@@ -188,6 +200,12 @@ class AdversarialSimulator:
|
|
|
188
200
|
)
|
|
189
201
|
self._ensure_service_dependencies()
|
|
190
202
|
templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
|
|
203
|
+
if len(templates) == 0:
|
|
204
|
+
raise EvaluationException(
|
|
205
|
+
message="Templates not found. Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
|
|
206
|
+
internal_message="Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
|
|
207
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
208
|
+
)
|
|
191
209
|
simulation_id = str(uuid.uuid4())
|
|
192
210
|
logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
|
|
193
211
|
concurrent_async_task = min(concurrent_async_task, 1000)
|
|
@@ -206,7 +224,10 @@ class AdversarialSimulator:
|
|
|
206
224
|
total_tasks = min(total_tasks, max_simulation_results)
|
|
207
225
|
_jailbreak_type = kwargs.get("_jailbreak_type", None)
|
|
208
226
|
if _jailbreak_type:
|
|
209
|
-
|
|
227
|
+
if isinstance(self.rai_client, RAIClient):
|
|
228
|
+
jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
|
|
229
|
+
elif isinstance(self.rai_client, AIProjectClient):
|
|
230
|
+
jailbreak_dataset = self.rai_client.red_teams.get_jail_break_dataset_with_type(type=_jailbreak_type)
|
|
210
231
|
progress_bar = tqdm(
|
|
211
232
|
total=total_tasks,
|
|
212
233
|
desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
|
|
@@ -336,23 +357,25 @@ class AdversarialSimulator:
|
|
|
336
357
|
target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
|
|
337
358
|
)
|
|
338
359
|
bots = [user_bot, system_bot]
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
360
|
+
|
|
361
|
+
if isinstance(self.rai_client, AIProjectClient):
|
|
362
|
+
session = self.rai_client
|
|
363
|
+
else:
|
|
364
|
+
session = get_async_http_client().with_policies(
|
|
365
|
+
retry_policy=AsyncRetryPolicy(
|
|
366
|
+
retry_total=api_call_retry_limit,
|
|
367
|
+
retry_backoff_factor=api_call_retry_sleep_sec,
|
|
368
|
+
retry_mode=RetryMode.Fixed,
|
|
369
|
+
)
|
|
344
370
|
)
|
|
371
|
+
_, conversation_history = await simulate_conversation(
|
|
372
|
+
bots=bots,
|
|
373
|
+
session=session,
|
|
374
|
+
turn_limit=max_conversation_turns,
|
|
375
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
376
|
+
language=language,
|
|
345
377
|
)
|
|
346
378
|
|
|
347
|
-
async with semaphore, session:
|
|
348
|
-
_, conversation_history = await simulate_conversation(
|
|
349
|
-
bots=bots,
|
|
350
|
-
session=session,
|
|
351
|
-
turn_limit=max_conversation_turns,
|
|
352
|
-
api_call_delay_sec=api_call_delay_sec,
|
|
353
|
-
language=language,
|
|
354
|
-
)
|
|
355
|
-
|
|
356
379
|
return self._to_chat_protocol(
|
|
357
380
|
conversation_history=conversation_history,
|
|
358
381
|
template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
|
|
@@ -361,11 +384,12 @@ class AdversarialSimulator:
|
|
|
361
384
|
def _get_user_proxy_completion_model(
|
|
362
385
|
self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
|
|
363
386
|
) -> ProxyChatCompletionsModel:
|
|
387
|
+
endpoint_url = self.rai_client._config.endpoint + "/redTeams/simulation/chat/completions/submit" if isinstance(self.rai_client, AIProjectClient) else self.rai_client.simulation_submit_endpoint
|
|
364
388
|
return ProxyChatCompletionsModel(
|
|
365
389
|
name="raisvc_proxy_model",
|
|
366
390
|
template_key=template_key,
|
|
367
391
|
template_parameters=template_parameters,
|
|
368
|
-
endpoint_url=
|
|
392
|
+
endpoint_url=endpoint_url,
|
|
369
393
|
token_manager=self.token_manager,
|
|
370
394
|
api_version="2023-07-01-preview",
|
|
371
395
|
max_tokens=1200,
|
|
@@ -8,13 +8,14 @@ import logging
|
|
|
8
8
|
import time
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
|
|
11
|
-
|
|
11
|
+
import base64
|
|
12
12
|
import re
|
|
13
13
|
import jinja2
|
|
14
14
|
|
|
15
15
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
16
16
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
17
17
|
from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
|
|
18
|
+
from azure.ai.evaluation._common.onedp._client import AIProjectClient
|
|
18
19
|
from .._model_tools._template_handler import TemplateParameters
|
|
19
20
|
from .constants import ConversationRole
|
|
20
21
|
|
|
@@ -145,7 +146,7 @@ class ConversationBot:
|
|
|
145
146
|
|
|
146
147
|
async def generate_response(
|
|
147
148
|
self,
|
|
148
|
-
session: AsyncHttpPipeline,
|
|
149
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
149
150
|
conversation_history: List[ConversationTurn],
|
|
150
151
|
max_history: int,
|
|
151
152
|
turn_number: int = 0,
|
|
@@ -255,7 +256,7 @@ class CallbackConversationBot(ConversationBot):
|
|
|
255
256
|
|
|
256
257
|
async def generate_response(
|
|
257
258
|
self,
|
|
258
|
-
session: AsyncHttpPipeline,
|
|
259
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
259
260
|
conversation_history: List[Any],
|
|
260
261
|
max_history: int,
|
|
261
262
|
turn_number: int = 0,
|
|
@@ -329,7 +330,7 @@ class MultiModalConversationBot(ConversationBot):
|
|
|
329
330
|
callback: Callable,
|
|
330
331
|
user_template: str,
|
|
331
332
|
user_template_parameters: TemplateParameters,
|
|
332
|
-
rai_client: RAIClient,
|
|
333
|
+
rai_client: Union[RAIClient, AIProjectClient],
|
|
333
334
|
*args,
|
|
334
335
|
**kwargs,
|
|
335
336
|
) -> None:
|
|
@@ -342,7 +343,7 @@ class MultiModalConversationBot(ConversationBot):
|
|
|
342
343
|
|
|
343
344
|
async def generate_response(
|
|
344
345
|
self,
|
|
345
|
-
session: AsyncHttpPipeline,
|
|
346
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
346
347
|
conversation_history: List[Any],
|
|
347
348
|
max_history: int,
|
|
348
349
|
turn_number: int = 0,
|
|
@@ -418,7 +419,13 @@ class MultiModalConversationBot(ConversationBot):
|
|
|
418
419
|
contents = []
|
|
419
420
|
for msg in messages:
|
|
420
421
|
if msg.startswith("image_understanding/"):
|
|
421
|
-
|
|
422
|
+
if(isinstance(self.rai_client, RAIClient)):
|
|
423
|
+
encoded_image = await self.rai_client.get_image_data(msg)
|
|
424
|
+
else:
|
|
425
|
+
response = self.rai_client.red_teams.get_template_parameters_image(path=msg, stream="true")
|
|
426
|
+
image_data = b"".join(response)
|
|
427
|
+
encoded_image = base64.b64encode(image_data).decode("utf-8")
|
|
428
|
+
|
|
422
429
|
contents.append(
|
|
423
430
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
|
|
424
431
|
)
|
|
@@ -11,6 +11,7 @@ from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
|
11
11
|
from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
12
12
|
from ..._http_utils import AsyncHttpPipeline
|
|
13
13
|
from . import ConversationBot, ConversationTurn
|
|
14
|
+
from azure.ai.evaluation._common.onedp._client import AIProjectClient
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
|
|
@@ -72,7 +73,7 @@ def is_closing_message_helper(response: str) -> bool:
|
|
|
72
73
|
async def simulate_conversation(
|
|
73
74
|
*,
|
|
74
75
|
bots: List[ConversationBot],
|
|
75
|
-
session: AsyncHttpPipeline,
|
|
76
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
76
77
|
language: SupportedLanguages,
|
|
77
78
|
stopping_criteria: Callable[[str], bool] = is_closing_message,
|
|
78
79
|
turn_limit: int = 10,
|
|
@@ -5,17 +5,19 @@
|
|
|
5
5
|
# noqa: E501
|
|
6
6
|
import logging
|
|
7
7
|
from random import randint
|
|
8
|
-
from typing import Callable, Optional, cast
|
|
8
|
+
from typing import Callable, Optional, cast, Union
|
|
9
9
|
|
|
10
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
10
11
|
from azure.ai.evaluation._common._experimental import experimental
|
|
11
|
-
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
12
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
12
13
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
14
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
14
15
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
16
|
+
from azure.ai.evaluation._common.onedp._client import AIProjectClient
|
|
15
17
|
from azure.core.credentials import TokenCredential
|
|
16
18
|
|
|
17
19
|
from ._adversarial_simulator import AdversarialSimulator
|
|
18
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
20
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
@@ -42,26 +44,37 @@ class DirectAttackSimulator:
|
|
|
42
44
|
:caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
|
|
43
45
|
"""
|
|
44
46
|
|
|
45
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
47
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
46
48
|
"""Constructor."""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
self.azure_ai_project =
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
49
|
+
|
|
50
|
+
if is_onedp_project(azure_ai_project):
|
|
51
|
+
self.azure_ai_project = azure_ai_project
|
|
52
|
+
self.credential=cast(TokenCredential, credential)
|
|
53
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
54
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
55
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
56
|
+
credential=self.credential
|
|
57
|
+
)
|
|
58
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
59
|
+
else:
|
|
60
|
+
try:
|
|
61
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
62
|
+
except EvaluationException as e:
|
|
63
|
+
raise EvaluationException(
|
|
64
|
+
message=e.message,
|
|
65
|
+
internal_message=e.internal_message,
|
|
66
|
+
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
67
|
+
category=e.category,
|
|
68
|
+
blame=e.blame,
|
|
69
|
+
) from e
|
|
70
|
+
self.credential = cast(TokenCredential, credential)
|
|
71
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
72
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
73
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
74
|
+
credential=self.credential,
|
|
75
|
+
)
|
|
76
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
77
|
+
|
|
65
78
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
66
79
|
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
67
80
|
)
|
|
@@ -14,4 +14,5 @@ SUPPORTED_LANGUAGES_MAPPING = {
|
|
|
14
14
|
SupportedLanguages.SimplifiedChinese: BASE_SUFFIX.replace("__language__", "simplified chinese"),
|
|
15
15
|
SupportedLanguages.Portuguese: BASE_SUFFIX.replace("__language__", "portuguese"),
|
|
16
16
|
SupportedLanguages.Japanese: BASE_SUFFIX.replace("__language__", "japanese"),
|
|
17
|
+
SupportedLanguages.Korean: BASE_SUFFIX.replace("__language__", "korean"),
|
|
17
18
|
}
|
|
@@ -5,20 +5,22 @@
|
|
|
5
5
|
# noqa: E501
|
|
6
6
|
import asyncio
|
|
7
7
|
import logging
|
|
8
|
-
from typing import Callable, cast
|
|
8
|
+
from typing import Callable, cast, Union
|
|
9
9
|
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
|
|
12
|
-
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
12
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
13
13
|
from azure.ai.evaluation._common._experimental import experimental
|
|
14
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
15
|
from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
|
|
16
16
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
17
|
+
from azure.ai.evaluation._common.onedp._client import AIProjectClient
|
|
17
18
|
from azure.core.credentials import TokenCredential
|
|
19
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
18
20
|
|
|
19
21
|
from ._adversarial_simulator import AdversarialSimulator, JsonLineList
|
|
20
22
|
|
|
21
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
23
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
22
24
|
|
|
23
25
|
logger = logging.getLogger(__name__)
|
|
24
26
|
|
|
@@ -44,30 +46,43 @@ class IndirectAttackSimulator(AdversarialSimulator):
|
|
|
44
46
|
:caption: Run the IndirectAttackSimulator to produce 1 result with 1 conversation turn (2 messages in the result).
|
|
45
47
|
"""
|
|
46
48
|
|
|
47
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
49
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
48
50
|
"""Constructor."""
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
self.azure_ai_project =
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
52
|
+
if is_onedp_project(azure_ai_project):
|
|
53
|
+
self.azure_ai_project = azure_ai_project
|
|
54
|
+
self.credential=cast(TokenCredential, credential)
|
|
55
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
56
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
57
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
58
|
+
credential=self.credential
|
|
59
|
+
)
|
|
60
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
61
|
+
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
62
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
try:
|
|
66
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
67
|
+
except EvaluationException as e:
|
|
68
|
+
raise EvaluationException(
|
|
69
|
+
message=e.message,
|
|
70
|
+
internal_message=e.internal_message,
|
|
71
|
+
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
72
|
+
category=e.category,
|
|
73
|
+
blame=e.blame,
|
|
74
|
+
) from e
|
|
75
|
+
|
|
76
|
+
self.credential = cast(TokenCredential, credential)
|
|
77
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
78
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
79
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
80
|
+
credential=self.credential,
|
|
81
|
+
)
|
|
82
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
83
|
+
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
84
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
85
|
+
)
|
|
71
86
|
super().__init__(azure_ai_project=azure_ai_project, credential=credential)
|
|
72
87
|
|
|
73
88
|
def _ensure_service_dependencies(self):
|
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
"""Tooling for model evaluation"""
|
|
6
6
|
|
|
7
|
-
from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager
|
|
7
|
+
from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager
|
|
8
8
|
from ._proxy_completion_model import ProxyChatCompletionsModel
|
|
9
9
|
from ._rai_client import RAIClient
|
|
10
10
|
from ._template_handler import CONTENT_HARM_TEMPLATES_COLLECTION_KEY, AdversarialTemplateHandler
|
|
11
11
|
from .models import LLMBase, OpenAIChatCompletionsModel
|
|
12
|
+
from ..._constants import TokenScope
|
|
12
13
|
|
|
13
14
|
__all__ = [
|
|
14
15
|
"ManagedIdentityAPITokenManager",
|
|
@@ -3,12 +3,15 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
-
from typing import Dict, List, Optional
|
|
6
|
+
from typing import Dict, List, Optional, Union
|
|
7
7
|
|
|
8
8
|
from azure.core.credentials import TokenCredential
|
|
9
9
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
10
10
|
from azure.ai.evaluation.simulator._model_tools import ManagedIdentityAPITokenManager
|
|
11
11
|
from azure.ai.evaluation._common.raiclient import MachineLearningServicesClient
|
|
12
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
13
|
+
from azure.ai.evaluation._common.utils import is_onedp_project
|
|
14
|
+
from azure.ai.evaluation._common.onedp import AIProjectClient
|
|
12
15
|
import jwt
|
|
13
16
|
import time
|
|
14
17
|
import ast
|
|
@@ -22,24 +25,27 @@ class GeneratedRAIClient:
|
|
|
22
25
|
:type token_manager: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
|
-
def __init__(self, azure_ai_project: AzureAIProject, token_manager: ManagedIdentityAPITokenManager):
|
|
28
|
+
def __init__(self, azure_ai_project: Union[AzureAIProject, str], token_manager: ManagedIdentityAPITokenManager):
|
|
26
29
|
self.azure_ai_project = azure_ai_project
|
|
27
30
|
self.token_manager = token_manager
|
|
28
31
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
if not is_onedp_project(azure_ai_project):
|
|
33
|
+
# Service URL construction
|
|
34
|
+
if "RAI_SVC_URL" in os.environ:
|
|
35
|
+
endpoint = os.environ["RAI_SVC_URL"].rstrip("/")
|
|
36
|
+
else:
|
|
37
|
+
endpoint = self._get_service_discovery_url()
|
|
34
38
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
# Create the autogenerated client
|
|
40
|
+
self._client = MachineLearningServicesClient(
|
|
41
|
+
endpoint=endpoint,
|
|
42
|
+
subscription_id=self.azure_ai_project["subscription_id"],
|
|
43
|
+
resource_group_name=self.azure_ai_project["resource_group_name"],
|
|
44
|
+
workspace_name=self.azure_ai_project["project_name"],
|
|
45
|
+
credential=self.token_manager,
|
|
46
|
+
).rai_svc
|
|
47
|
+
else:
|
|
48
|
+
self._client = AIProjectClient(endpoint=azure_ai_project, credential=token_manager).red_teams
|
|
43
49
|
|
|
44
50
|
def _get_service_discovery_url(self):
|
|
45
51
|
"""Get the service discovery URL.
|
|
@@ -87,7 +93,7 @@ class GeneratedRAIClient:
|
|
|
87
93
|
"""
|
|
88
94
|
try:
|
|
89
95
|
# Send the request using the autogenerated client
|
|
90
|
-
response = self._client.
|
|
96
|
+
response = self._client.get_attack_objectives(
|
|
91
97
|
risk_types=[risk_category],
|
|
92
98
|
lang="en",
|
|
93
99
|
strategy=strategy,
|
|
@@ -108,7 +114,7 @@ class GeneratedRAIClient:
|
|
|
108
114
|
"""
|
|
109
115
|
try:
|
|
110
116
|
# Send the request using the autogenerated client
|
|
111
|
-
response = self._client.
|
|
117
|
+
response = self._client.get_jail_break_dataset_with_type(type="upia")
|
|
112
118
|
if isinstance(response, list):
|
|
113
119
|
return response
|
|
114
120
|
else:
|
|
@@ -142,4 +148,4 @@ class GeneratedRAIClient:
|
|
|
142
148
|
if (exp_time - current_time) >= 300:
|
|
143
149
|
return token
|
|
144
150
|
|
|
145
|
-
return credential.get_token(
|
|
151
|
+
return credential.get_token(TokenScope.DEFAULT_AZURE_MANAGEMENT).token
|
|
@@ -8,9 +8,9 @@ import logging
|
|
|
8
8
|
import os
|
|
9
9
|
import time
|
|
10
10
|
from abc import ABC, abstractmethod
|
|
11
|
-
from
|
|
12
|
-
from typing import Optional, Union
|
|
11
|
+
from typing import Optional, Union, Any
|
|
13
12
|
|
|
13
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
14
14
|
from azure.core.credentials import AccessToken, TokenCredential
|
|
15
15
|
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
|
|
16
16
|
|
|
@@ -19,12 +19,6 @@ AZURE_TOKEN_REFRESH_INTERVAL = int(
|
|
|
19
19
|
) # token refresh interval in seconds
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class TokenScope(Enum):
|
|
23
|
-
"""Token scopes for Azure endpoints"""
|
|
24
|
-
|
|
25
|
-
DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
|
|
26
|
-
|
|
27
|
-
|
|
28
22
|
class APITokenManager(ABC):
|
|
29
23
|
"""Base class for managing API tokens. Subclasses should implement the get_token method.
|
|
30
24
|
|
|
@@ -83,7 +77,8 @@ class APITokenManager(ABC):
|
|
|
83
77
|
return DefaultAzureCredential()
|
|
84
78
|
|
|
85
79
|
@abstractmethod
|
|
86
|
-
def get_token(
|
|
80
|
+
def get_token(
|
|
81
|
+
self, scopes: Union[str, None] = None, claims: Union[str, None] = None, tenant_id: Union[str, None] = None, enable_cae: bool = False, **kwargs: Any) -> AccessToken:
|
|
87
82
|
"""Async method to get the API token. Subclasses should implement this method.
|
|
88
83
|
|
|
89
84
|
:return: API token
|
|
@@ -103,7 +98,7 @@ class ManagedIdentityAPITokenManager(APITokenManager):
|
|
|
103
98
|
"""API Token Manager for Azure Managed Identity
|
|
104
99
|
|
|
105
100
|
:param token_scope: Token scope for Azure endpoint
|
|
106
|
-
:type token_scope: ~azure.ai.evaluation.
|
|
101
|
+
:type token_scope: ~azure.ai.evaluation._constants.TokenScope
|
|
107
102
|
:param logger: Logger object
|
|
108
103
|
:type logger: logging.Logger
|
|
109
104
|
:keyword kwargs: Additional keyword arguments
|