azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +9 -5
- azure/ai/evaluation/_common/constants.py +4 -2
- azure/ai/evaluation/_common/math.py +18 -0
- azure/ai/evaluation/_common/rai_service.py +54 -62
- azure/ai/evaluation/_common/utils.py +201 -16
- azure/ai/evaluation/_constants.py +12 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +10 -3
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +33 -17
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +17 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +26 -10
- azure/ai/evaluation/_evaluate/_evaluate.py +161 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +16 -17
- azure/ai/evaluation/_evaluate/_utils.py +44 -25
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +33 -79
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +331 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +76 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +97 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -20
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +63 -42
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
- azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +30 -74
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +34 -80
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
- azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -3
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +35 -83
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
- azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +25 -28
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +23 -17
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +15 -90
- azure/ai/evaluation/_exceptions.py +9 -7
- azure/ai/evaluation/_http_utils.py +203 -132
- azure/ai/evaluation/_model_configurations.py +37 -9
- azure/ai/evaluation/{_evaluators/_chat/retrieval → _vendor}/__init__.py +0 -6
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +85 -60
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
- azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
- azure/ai/evaluation/simulator/_helpers/_experimental.py +20 -9
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +4 -4
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +22 -64
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
- azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -6
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -4
- azure/ai/evaluation/simulator/_simulator.py +127 -117
- azure/ai/evaluation/simulator/_tracing.py +4 -4
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/METADATA +129 -43
- azure_ai_evaluation-1.0.0b4.dist-info/NOTICE.txt +50 -0
- azure_ai_evaluation-1.0.0b4.dist-info/RECORD +106 -0
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/top_level.txt +0 -0
|
@@ -6,21 +6,22 @@
|
|
|
6
6
|
import asyncio
|
|
7
7
|
import logging
|
|
8
8
|
import random
|
|
9
|
-
from typing import Any, Callable, Dict, List, Optional
|
|
9
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
|
|
10
10
|
|
|
11
11
|
from tqdm import tqdm
|
|
12
12
|
|
|
13
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
13
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
14
15
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
15
|
-
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
16
16
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
17
17
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
18
|
+
from azure.core.credentials import TokenCredential
|
|
18
19
|
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
19
|
-
from azure.identity import DefaultAzureCredential
|
|
20
20
|
|
|
21
21
|
from ._constants import SupportedLanguages
|
|
22
|
-
from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
|
|
22
|
+
from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
|
|
23
23
|
from ._conversation._conversation import simulate_conversation
|
|
24
|
+
from ._helpers import experimental
|
|
24
25
|
from ._model_tools import (
|
|
25
26
|
AdversarialTemplateHandler,
|
|
26
27
|
ManagedIdentityAPITokenManager,
|
|
@@ -28,11 +29,13 @@ from ._model_tools import (
|
|
|
28
29
|
RAIClient,
|
|
29
30
|
TokenScope,
|
|
30
31
|
)
|
|
32
|
+
from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
|
|
31
33
|
from ._utils import JsonLineList
|
|
32
34
|
|
|
33
35
|
logger = logging.getLogger(__name__)
|
|
34
36
|
|
|
35
37
|
|
|
38
|
+
@experimental
|
|
36
39
|
class AdversarialSimulator:
|
|
37
40
|
"""
|
|
38
41
|
Initializes the adversarial simulator with a project scope.
|
|
@@ -44,41 +47,28 @@ class AdversarialSimulator:
|
|
|
44
47
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
45
48
|
"""
|
|
46
49
|
|
|
47
|
-
def __init__(self, *, azure_ai_project:
|
|
50
|
+
def __init__(self, *, azure_ai_project: dict, credential):
|
|
48
51
|
"""Constructor."""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
55
|
+
except EvaluationException as e:
|
|
52
56
|
raise EvaluationException(
|
|
53
|
-
message=
|
|
54
|
-
internal_message=
|
|
57
|
+
message=e.message,
|
|
58
|
+
internal_message=e.internal_message,
|
|
55
59
|
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
56
|
-
category=
|
|
57
|
-
blame=
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
|
|
61
|
-
msg = "subscription_id, resource_group_name and project_name cannot be None"
|
|
62
|
-
raise EvaluationException(
|
|
63
|
-
message=msg,
|
|
64
|
-
internal_message=msg,
|
|
65
|
-
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
66
|
-
category=ErrorCategory.MISSING_FIELD,
|
|
67
|
-
blame=ErrorBlame.USER_ERROR,
|
|
68
|
-
)
|
|
69
|
-
if "credential" not in azure_ai_project and not credential:
|
|
70
|
-
credential = DefaultAzureCredential()
|
|
71
|
-
elif "credential" in azure_ai_project:
|
|
72
|
-
credential = azure_ai_project["credential"]
|
|
73
|
-
self.azure_ai_project = azure_ai_project
|
|
60
|
+
category=e.category,
|
|
61
|
+
blame=e.blame,
|
|
62
|
+
) from e
|
|
63
|
+
|
|
74
64
|
self.token_manager = ManagedIdentityAPITokenManager(
|
|
75
65
|
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
76
66
|
logger=logging.getLogger("AdversarialSimulator"),
|
|
77
|
-
credential=credential,
|
|
67
|
+
credential=cast(TokenCredential, credential),
|
|
78
68
|
)
|
|
79
|
-
self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
|
|
69
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
80
70
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
81
|
-
azure_ai_project=azure_ai_project, rai_client=self.rai_client
|
|
71
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
82
72
|
)
|
|
83
73
|
|
|
84
74
|
def _ensure_service_dependencies(self):
|
|
@@ -92,7 +82,7 @@ class AdversarialSimulator:
|
|
|
92
82
|
blame=ErrorBlame.USER_ERROR,
|
|
93
83
|
)
|
|
94
84
|
|
|
95
|
-
#
|
|
85
|
+
# pylint: disable=too-many-locals
|
|
96
86
|
async def __call__(
|
|
97
87
|
self,
|
|
98
88
|
*,
|
|
@@ -106,10 +96,10 @@ class AdversarialSimulator:
|
|
|
106
96
|
api_call_retry_sleep_sec: int = 1,
|
|
107
97
|
api_call_delay_sec: int = 0,
|
|
108
98
|
concurrent_async_task: int = 3,
|
|
109
|
-
_jailbreak_type: Optional[str] = None,
|
|
110
99
|
language: SupportedLanguages = SupportedLanguages.English,
|
|
111
100
|
randomize_order: bool = True,
|
|
112
101
|
randomization_seed: Optional[int] = None,
|
|
102
|
+
**kwargs,
|
|
113
103
|
):
|
|
114
104
|
"""
|
|
115
105
|
Executes the adversarial simulation against a specified target function asynchronously.
|
|
@@ -216,6 +206,7 @@ class AdversarialSimulator:
|
|
|
216
206
|
total_tasks,
|
|
217
207
|
)
|
|
218
208
|
total_tasks = min(total_tasks, max_simulation_results)
|
|
209
|
+
_jailbreak_type = kwargs.get("_jailbreak_type", None)
|
|
219
210
|
if _jailbreak_type:
|
|
220
211
|
jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
|
|
221
212
|
progress_bar = tqdm(
|
|
@@ -263,16 +254,21 @@ class AdversarialSimulator:
|
|
|
263
254
|
|
|
264
255
|
return JsonLineList(sim_results)
|
|
265
256
|
|
|
266
|
-
def _to_chat_protocol(
|
|
257
|
+
def _to_chat_protocol(
|
|
258
|
+
self,
|
|
259
|
+
*,
|
|
260
|
+
conversation_history: List[ConversationTurn],
|
|
261
|
+
template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
|
|
262
|
+
):
|
|
267
263
|
if template_parameters is None:
|
|
268
264
|
template_parameters = {}
|
|
269
265
|
messages = []
|
|
270
266
|
for _, m in enumerate(conversation_history):
|
|
271
267
|
message = {"content": m.message, "role": m.role.value}
|
|
272
|
-
if "context" in m.full_response:
|
|
268
|
+
if m.full_response is not None and "context" in m.full_response:
|
|
273
269
|
message["context"] = m.full_response["context"]
|
|
274
270
|
messages.append(message)
|
|
275
|
-
conversation_category = template_parameters.pop("metadata", {}).get("Category")
|
|
271
|
+
conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
|
|
276
272
|
template_parameters["metadata"] = {}
|
|
277
273
|
for key in (
|
|
278
274
|
"conversation_starter",
|
|
@@ -294,14 +290,14 @@ class AdversarialSimulator:
|
|
|
294
290
|
self,
|
|
295
291
|
*,
|
|
296
292
|
target: Callable,
|
|
297
|
-
template,
|
|
298
|
-
parameters,
|
|
299
|
-
max_conversation_turns,
|
|
300
|
-
api_call_retry_limit,
|
|
301
|
-
api_call_retry_sleep_sec,
|
|
302
|
-
api_call_delay_sec,
|
|
303
|
-
language,
|
|
304
|
-
semaphore,
|
|
293
|
+
template: AdversarialTemplate,
|
|
294
|
+
parameters: TemplateParameters,
|
|
295
|
+
max_conversation_turns: int,
|
|
296
|
+
api_call_retry_limit: int,
|
|
297
|
+
api_call_retry_sleep_sec: int,
|
|
298
|
+
api_call_delay_sec: int,
|
|
299
|
+
language: SupportedLanguages,
|
|
300
|
+
semaphore: asyncio.Semaphore,
|
|
305
301
|
) -> List[Dict]:
|
|
306
302
|
user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
|
|
307
303
|
system_bot = self._setup_bot(
|
|
@@ -324,9 +320,15 @@ class AdversarialSimulator:
|
|
|
324
320
|
api_call_delay_sec=api_call_delay_sec,
|
|
325
321
|
language=language,
|
|
326
322
|
)
|
|
327
|
-
return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)
|
|
328
323
|
|
|
329
|
-
|
|
324
|
+
return self._to_chat_protocol(
|
|
325
|
+
conversation_history=conversation_history,
|
|
326
|
+
template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def _get_user_proxy_completion_model(
|
|
330
|
+
self, template_key: str, template_parameters: TemplateParameters
|
|
331
|
+
) -> ProxyChatCompletionsModel:
|
|
330
332
|
return ProxyChatCompletionsModel(
|
|
331
333
|
name="raisvc_proxy_model",
|
|
332
334
|
template_key=template_key,
|
|
@@ -338,8 +340,15 @@ class AdversarialSimulator:
|
|
|
338
340
|
temperature=0.0,
|
|
339
341
|
)
|
|
340
342
|
|
|
341
|
-
def _setup_bot(
|
|
342
|
-
|
|
343
|
+
def _setup_bot(
|
|
344
|
+
self,
|
|
345
|
+
*,
|
|
346
|
+
role: ConversationRole,
|
|
347
|
+
template: AdversarialTemplate,
|
|
348
|
+
parameters: TemplateParameters,
|
|
349
|
+
target: Optional[Callable] = None,
|
|
350
|
+
) -> ConversationBot:
|
|
351
|
+
if role is ConversationRole.USER:
|
|
343
352
|
model = self._get_user_proxy_completion_model(
|
|
344
353
|
template_key=template.template_name, template_parameters=parameters
|
|
345
354
|
)
|
|
@@ -350,30 +359,46 @@ class AdversarialSimulator:
|
|
|
350
359
|
instantiation_parameters=parameters,
|
|
351
360
|
)
|
|
352
361
|
|
|
353
|
-
if role
|
|
362
|
+
if role is ConversationRole.ASSISTANT:
|
|
363
|
+
if target is None:
|
|
364
|
+
msg = "Cannot setup system bot. Target is None"
|
|
354
365
|
|
|
355
|
-
|
|
356
|
-
|
|
366
|
+
raise EvaluationException(
|
|
367
|
+
message=msg,
|
|
368
|
+
internal_message=msg,
|
|
369
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
370
|
+
error_category=ErrorCategory.INVALID_VALUE,
|
|
371
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
class DummyModel:
|
|
375
|
+
def __init__(self):
|
|
376
|
+
self.name = "dummy_model"
|
|
377
|
+
|
|
378
|
+
def __call__(self) -> None:
|
|
379
|
+
pass
|
|
357
380
|
|
|
358
|
-
dummy_model.name = "dummy_model"
|
|
359
381
|
return CallbackConversationBot(
|
|
360
382
|
callback=target,
|
|
361
383
|
role=role,
|
|
362
|
-
model=
|
|
384
|
+
model=DummyModel(),
|
|
363
385
|
user_template=str(template),
|
|
364
386
|
user_template_parameters=parameters,
|
|
365
387
|
conversation_template="",
|
|
366
388
|
instantiation_parameters={},
|
|
367
389
|
)
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
390
|
+
|
|
391
|
+
msg = "Invalid value for enum ConversationRole. This should never happen."
|
|
392
|
+
raise EvaluationException(
|
|
393
|
+
message=msg,
|
|
394
|
+
internal_message=msg,
|
|
395
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
396
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
397
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
373
398
|
)
|
|
374
399
|
|
|
375
|
-
def _join_conversation_starter(self, parameters, to_join):
|
|
376
|
-
key = "conversation_starter"
|
|
400
|
+
def _join_conversation_starter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
|
|
401
|
+
key: Literal["conversation_starter"] = "conversation_starter"
|
|
377
402
|
if key in parameters.keys():
|
|
378
403
|
parameters[key] = f"{to_join} {parameters[key]}"
|
|
379
404
|
else:
|
|
@@ -7,7 +7,7 @@ import copy
|
|
|
7
7
|
import logging
|
|
8
8
|
import time
|
|
9
9
|
from dataclasses import dataclass
|
|
10
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
10
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
|
|
11
11
|
|
|
12
12
|
import jinja2
|
|
13
13
|
|
|
@@ -15,6 +15,7 @@ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarg
|
|
|
15
15
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
16
16
|
|
|
17
17
|
from .._model_tools import LLMBase, OpenAIChatCompletionsModel
|
|
18
|
+
from .._model_tools._template_handler import TemplateParameters
|
|
18
19
|
from .constants import ConversationRole
|
|
19
20
|
|
|
20
21
|
|
|
@@ -40,7 +41,7 @@ class ConversationTurn:
|
|
|
40
41
|
role: "ConversationRole"
|
|
41
42
|
name: Optional[str] = None
|
|
42
43
|
message: str = ""
|
|
43
|
-
full_response: Optional[Any] = None
|
|
44
|
+
full_response: Optional[Dict[str, Any]] = None
|
|
44
45
|
request: Optional[Any] = None
|
|
45
46
|
|
|
46
47
|
def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
|
|
@@ -109,7 +110,7 @@ class ConversationBot:
|
|
|
109
110
|
role: ConversationRole,
|
|
110
111
|
model: Union[LLMBase, OpenAIChatCompletionsModel],
|
|
111
112
|
conversation_template: str,
|
|
112
|
-
instantiation_parameters:
|
|
113
|
+
instantiation_parameters: TemplateParameters,
|
|
113
114
|
) -> None:
|
|
114
115
|
self.role = role
|
|
115
116
|
self.conversation_template_orig = conversation_template
|
|
@@ -118,13 +119,13 @@ class ConversationBot:
|
|
|
118
119
|
)
|
|
119
120
|
self.persona_template_args = instantiation_parameters
|
|
120
121
|
if self.role == ConversationRole.USER:
|
|
121
|
-
self.name = self.persona_template_args.get("name", role.value)
|
|
122
|
+
self.name: str = cast(str, self.persona_template_args.get("name", role.value))
|
|
122
123
|
else:
|
|
123
|
-
self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
|
|
124
|
+
self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
|
|
124
125
|
self.model = model
|
|
125
126
|
|
|
126
127
|
self.logger = logging.getLogger(repr(self))
|
|
127
|
-
self.conversation_starter
|
|
128
|
+
self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
|
|
128
129
|
if role == ConversationRole.USER:
|
|
129
130
|
if "conversation_starter" in self.persona_template_args:
|
|
130
131
|
conversation_starter_content = self.persona_template_args["conversation_starter"]
|
|
@@ -148,7 +149,7 @@ class ConversationBot:
|
|
|
148
149
|
conversation_history: List[ConversationTurn],
|
|
149
150
|
max_history: int,
|
|
150
151
|
turn_number: int = 0,
|
|
151
|
-
) -> Tuple[dict, dict,
|
|
152
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
152
153
|
"""
|
|
153
154
|
Prompt the ConversationBot for a response.
|
|
154
155
|
|
|
@@ -161,7 +162,7 @@ class ConversationBot:
|
|
|
161
162
|
:param turn_number: Parameters used to query GPT-4 model.
|
|
162
163
|
:type turn_number: int
|
|
163
164
|
:return: The response from the ConversationBot.
|
|
164
|
-
:rtype: Tuple[dict, dict,
|
|
165
|
+
:rtype: Tuple[dict, dict, float, dict]
|
|
165
166
|
"""
|
|
166
167
|
|
|
167
168
|
# check if this is the first turn and the conversation_starter is not None,
|
|
@@ -169,11 +170,11 @@ class ConversationBot:
|
|
|
169
170
|
if turn_number == 0 and self.conversation_starter is not None:
|
|
170
171
|
# if conversation_starter is a dictionary, pass it into samples as is
|
|
171
172
|
if isinstance(self.conversation_starter, dict):
|
|
172
|
-
samples = [self.conversation_starter]
|
|
173
|
+
samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
|
|
173
174
|
if isinstance(self.conversation_starter, jinja2.Template):
|
|
174
175
|
samples = [self.conversation_starter.render(**self.persona_template_args)]
|
|
175
176
|
else:
|
|
176
|
-
samples = [self.conversation_starter]
|
|
177
|
+
samples = [self.conversation_starter]
|
|
177
178
|
time_taken = 0
|
|
178
179
|
|
|
179
180
|
finish_reason = ["stop"]
|
|
@@ -238,7 +239,7 @@ class CallbackConversationBot(ConversationBot):
|
|
|
238
239
|
self,
|
|
239
240
|
callback: Callable,
|
|
240
241
|
user_template: str,
|
|
241
|
-
user_template_parameters:
|
|
242
|
+
user_template_parameters: TemplateParameters,
|
|
242
243
|
*args,
|
|
243
244
|
**kwargs,
|
|
244
245
|
) -> None:
|
|
@@ -254,7 +255,7 @@ class CallbackConversationBot(ConversationBot):
|
|
|
254
255
|
conversation_history: List[Any],
|
|
255
256
|
max_history: int,
|
|
256
257
|
turn_number: int = 0,
|
|
257
|
-
) -> Tuple[dict, dict,
|
|
258
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
258
259
|
chat_protocol_message = self._to_chat_protocol(
|
|
259
260
|
self.user_template, conversation_history, self.user_template_parameters
|
|
260
261
|
)
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import logging
|
|
7
|
-
from typing import Callable, Dict, List, Tuple, Union
|
|
7
|
+
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
8
8
|
|
|
9
9
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
10
|
from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
@@ -80,7 +80,7 @@ async def simulate_conversation(
|
|
|
80
80
|
history_limit: int = 5,
|
|
81
81
|
api_call_delay_sec: float = 0,
|
|
82
82
|
logger: logging.Logger = logging.getLogger(__name__),
|
|
83
|
-
) -> Tuple:
|
|
83
|
+
) -> Tuple[Optional[str], List[ConversationTurn]]:
|
|
84
84
|
"""
|
|
85
85
|
Simulate a conversation between the given bots.
|
|
86
86
|
|
|
@@ -99,7 +99,7 @@ async def simulate_conversation(
|
|
|
99
99
|
:keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
|
|
100
100
|
:paramtype logger: logging.Logger
|
|
101
101
|
:return: Simulation a conversation between the given bots.
|
|
102
|
-
:rtype: Tuple
|
|
102
|
+
:rtype: Tuple[Optional[str], List[ConversationTurn]]
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
105
|
# Read the first prompt.
|
|
@@ -110,7 +110,7 @@ async def simulate_conversation(
|
|
|
110
110
|
turn_number=0,
|
|
111
111
|
)
|
|
112
112
|
if "id" in first_response:
|
|
113
|
-
conversation_id = first_response["id"]
|
|
113
|
+
conversation_id: Optional[str] = first_response["id"]
|
|
114
114
|
else:
|
|
115
115
|
conversation_id = None
|
|
116
116
|
first_prompt = first_response["samples"][0]
|
|
@@ -1,54 +1,25 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
+
# pylint: disable=C0301,C0114,R0913,R0903
|
|
4
5
|
# noqa: E501
|
|
5
|
-
import functools
|
|
6
6
|
import logging
|
|
7
7
|
from random import randint
|
|
8
|
-
from typing import Callable, Optional
|
|
9
|
-
|
|
10
|
-
from promptflow._sdk._telemetry import ActivityType, monitor_operation
|
|
8
|
+
from typing import Callable, Optional, cast
|
|
11
9
|
|
|
10
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
12
11
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
|
-
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
14
12
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
15
|
-
from azure.
|
|
13
|
+
from azure.core.credentials import TokenCredential
|
|
16
14
|
|
|
17
15
|
from ._adversarial_simulator import AdversarialSimulator
|
|
16
|
+
from ._helpers import experimental
|
|
18
17
|
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
19
18
|
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
"""Decorator to monitor adversarial scenario.
|
|
25
|
-
|
|
26
|
-
:param func: The function to be decorated.
|
|
27
|
-
:type func: Callable
|
|
28
|
-
:return: The decorated function.
|
|
29
|
-
:rtype: Callable
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
@functools.wraps(func)
|
|
33
|
-
def wrapper(*args, **kwargs):
|
|
34
|
-
scenario = str(kwargs.get("scenario", None))
|
|
35
|
-
max_conversation_turns = kwargs.get("max_conversation_turns", None)
|
|
36
|
-
max_simulation_results = kwargs.get("max_simulation_results", None)
|
|
37
|
-
decorated_func = monitor_operation(
|
|
38
|
-
activity_name="jailbreak.adversarial.simulator.call",
|
|
39
|
-
activity_type=ActivityType.PUBLICAPI,
|
|
40
|
-
custom_dimensions={
|
|
41
|
-
"scenario": scenario,
|
|
42
|
-
"max_conversation_turns": max_conversation_turns,
|
|
43
|
-
"max_simulation_results": max_simulation_results,
|
|
44
|
-
},
|
|
45
|
-
)(func)
|
|
46
|
-
|
|
47
|
-
return decorated_func(*args, **kwargs)
|
|
48
|
-
|
|
49
|
-
return wrapper
|
|
50
|
-
|
|
51
|
-
|
|
22
|
+
@experimental
|
|
52
23
|
class DirectAttackSimulator:
|
|
53
24
|
"""
|
|
54
25
|
Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
@@ -61,42 +32,28 @@ class DirectAttackSimulator:
|
|
|
61
32
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
62
33
|
"""
|
|
63
34
|
|
|
64
|
-
def __init__(self, *, azure_ai_project:
|
|
35
|
+
def __init__(self, *, azure_ai_project: dict, credential):
|
|
65
36
|
"""Constructor."""
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
message=msg,
|
|
71
|
-
internal_message=msg,
|
|
72
|
-
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
73
|
-
category=ErrorCategory.MISSING_FIELD,
|
|
74
|
-
blame=ErrorBlame.USER_ERROR,
|
|
75
|
-
)
|
|
76
|
-
# check the value of the keys in azure_ai_project is not none
|
|
77
|
-
if not all(azure_ai_project[key] for key in ["subscription_id", "resource_group_name", "project_name"]):
|
|
78
|
-
msg = "subscription_id, resource_group_name and project_name keys cannot be None"
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
40
|
+
except EvaluationException as e:
|
|
79
41
|
raise EvaluationException(
|
|
80
|
-
message=
|
|
81
|
-
internal_message=
|
|
42
|
+
message=e.message,
|
|
43
|
+
internal_message=e.internal_message,
|
|
82
44
|
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
83
|
-
category=
|
|
84
|
-
blame=
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
credential = DefaultAzureCredential()
|
|
88
|
-
elif "credential" in azure_ai_project:
|
|
89
|
-
credential = azure_ai_project["credential"]
|
|
90
|
-
self.credential = credential
|
|
91
|
-
self.azure_ai_project = azure_ai_project
|
|
45
|
+
category=e.category,
|
|
46
|
+
blame=e.blame,
|
|
47
|
+
) from e
|
|
48
|
+
self.credential = cast(TokenCredential, credential)
|
|
92
49
|
self.token_manager = ManagedIdentityAPITokenManager(
|
|
93
50
|
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
94
51
|
logger=logging.getLogger("AdversarialSimulator"),
|
|
95
|
-
credential=credential,
|
|
52
|
+
credential=self.credential,
|
|
96
53
|
)
|
|
97
|
-
self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
|
|
54
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
98
55
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
99
|
-
azure_ai_project=azure_ai_project, rai_client=self.rai_client
|
|
56
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
100
57
|
)
|
|
101
58
|
|
|
102
59
|
def _ensure_service_dependencies(self):
|
|
@@ -110,7 +67,6 @@ class DirectAttackSimulator:
|
|
|
110
67
|
blame=ErrorBlame.USER_ERROR,
|
|
111
68
|
)
|
|
112
69
|
|
|
113
|
-
# @monitor_adversarial_scenario
|
|
114
70
|
async def __call__(
|
|
115
71
|
self,
|
|
116
72
|
*,
|
|
@@ -222,7 +178,9 @@ class DirectAttackSimulator:
|
|
|
222
178
|
if not randomization_seed:
|
|
223
179
|
randomization_seed = randint(0, 1000000)
|
|
224
180
|
|
|
225
|
-
regular_sim = AdversarialSimulator(
|
|
181
|
+
regular_sim = AdversarialSimulator(
|
|
182
|
+
azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential
|
|
183
|
+
)
|
|
226
184
|
regular_sim_results = await regular_sim(
|
|
227
185
|
scenario=scenario,
|
|
228
186
|
target=target,
|
|
@@ -235,7 +193,7 @@ class DirectAttackSimulator:
|
|
|
235
193
|
randomize_order=True,
|
|
236
194
|
randomization_seed=randomization_seed,
|
|
237
195
|
)
|
|
238
|
-
jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
196
|
+
jb_sim = AdversarialSimulator(azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential)
|
|
239
197
|
jb_sim_results = await jb_sim(
|
|
240
198
|
scenario=scenario,
|
|
241
199
|
target=target,
|
|
@@ -6,9 +6,9 @@ import functools
|
|
|
6
6
|
import inspect
|
|
7
7
|
import logging
|
|
8
8
|
import sys
|
|
9
|
-
from typing import Callable, Type, TypeVar, Union
|
|
9
|
+
from typing import Callable, Type, TypeVar, Union, overload
|
|
10
10
|
|
|
11
|
-
from typing_extensions import ParamSpec
|
|
11
|
+
from typing_extensions import ParamSpec, TypeGuard
|
|
12
12
|
|
|
13
13
|
DOCSTRING_TEMPLATE = ".. note:: {0} {1}\n\n"
|
|
14
14
|
DOCSTRING_DEFAULT_INDENTATION = 8
|
|
@@ -22,20 +22,31 @@ EXPERIMENTAL_LINK_MESSAGE = (
|
|
|
22
22
|
_warning_cache = set()
|
|
23
23
|
module_logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
25
|
-
TExperimental = TypeVar("TExperimental", bound=Union[Type, Callable])
|
|
26
25
|
P = ParamSpec("P")
|
|
27
26
|
T = TypeVar("T")
|
|
28
27
|
|
|
29
28
|
|
|
30
|
-
|
|
29
|
+
@overload
|
|
30
|
+
def experimental(wrapped: Type[T]) -> Type[T]: ...
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@overload
|
|
34
|
+
def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]:
|
|
31
38
|
"""Add experimental tag to a class or a method.
|
|
32
39
|
|
|
33
40
|
:param wrapped: Either a Class or Function to mark as experimental
|
|
34
|
-
:type wrapped:
|
|
41
|
+
:type wrapped: Union[Type[T], Callable[P, T]]
|
|
35
42
|
:return: The wrapped class or method
|
|
36
|
-
:rtype:
|
|
43
|
+
:rtype: Union[Type[T], Callable[P, T]]
|
|
37
44
|
"""
|
|
38
|
-
|
|
45
|
+
|
|
46
|
+
def is_class(t: Union[Type[T], Callable[P, T]]) -> TypeGuard[Type[T]]:
|
|
47
|
+
return isinstance(t, type)
|
|
48
|
+
|
|
49
|
+
if is_class(wrapped):
|
|
39
50
|
return _add_class_docstring(wrapped)
|
|
40
51
|
if inspect.isfunction(wrapped):
|
|
41
52
|
return _add_method_docstring(wrapped)
|
|
@@ -74,11 +85,11 @@ def _add_class_docstring(cls: Type[T]) -> Type[T]:
|
|
|
74
85
|
cls.__doc__ = _add_note_to_docstring(cls.__doc__, doc_string)
|
|
75
86
|
else:
|
|
76
87
|
cls.__doc__ = doc_string + ">"
|
|
77
|
-
cls.__init__ = _add_class_warning(cls.__init__)
|
|
88
|
+
cls.__init__ = _add_class_warning(cls.__init__) # type: ignore[method-assign]
|
|
78
89
|
return cls
|
|
79
90
|
|
|
80
91
|
|
|
81
|
-
def _add_method_docstring(func: Callable[P, T]
|
|
92
|
+
def _add_method_docstring(func: Callable[P, T]) -> Callable[P, T]:
|
|
82
93
|
"""Add experimental tag to the method doc string.
|
|
83
94
|
|
|
84
95
|
:param func: The function to update
|
|
@@ -18,7 +18,7 @@ class Turn:
|
|
|
18
18
|
|
|
19
19
|
role: Union[str, ConversationRole]
|
|
20
20
|
content: str
|
|
21
|
-
context: str = None
|
|
21
|
+
context: Optional[str] = None
|
|
22
22
|
|
|
23
23
|
def to_dict(self) -> Dict[str, Optional[str]]:
|
|
24
24
|
"""
|
|
@@ -42,13 +42,13 @@ class ConversationHistory:
|
|
|
42
42
|
Conversation history class to keep track of the conversation turns in a conversation.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
def __init__(self):
|
|
45
|
+
def __init__(self) -> None:
|
|
46
46
|
"""
|
|
47
47
|
Initializes the conversation history with an empty list of turns.
|
|
48
48
|
"""
|
|
49
49
|
self.history: List[Turn] = []
|
|
50
50
|
|
|
51
|
-
def add_to_history(self, turn: Turn):
|
|
51
|
+
def add_to_history(self, turn: Turn) -> None:
|
|
52
52
|
"""
|
|
53
53
|
Adds a turn to the conversation history.
|
|
54
54
|
|
|
@@ -57,7 +57,7 @@ class ConversationHistory:
|
|
|
57
57
|
"""
|
|
58
58
|
self.history.append(turn)
|
|
59
59
|
|
|
60
|
-
def to_list(self) -> List[Dict[str, str]]:
|
|
60
|
+
def to_list(self) -> List[Dict[str, Optional[str]]]:
|
|
61
61
|
"""
|
|
62
62
|
Converts the conversation history to a list of dictionaries.
|
|
63
63
|
|