azure-ai-evaluation 1.0.0b5__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/_common/_experimental.py +4 -0
- azure/ai/evaluation/_common/math.py +62 -2
- azure/ai/evaluation/_common/rai_service.py +80 -29
- azure/ai/evaluation/_common/utils.py +50 -16
- azure/ai/evaluation/_constants.py +1 -0
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +13 -3
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +11 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +34 -10
- azure/ai/evaluation/_evaluate/_evaluate.py +59 -103
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +2 -1
- azure/ai/evaluation/_evaluate/_utils.py +6 -4
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +16 -17
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +60 -29
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +17 -5
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +4 -2
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +6 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +56 -50
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +79 -34
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +73 -34
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +74 -33
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +76 -34
- azure/ai/evaluation/_evaluators/_eci/_eci.py +28 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -13
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +57 -26
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +13 -15
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +68 -30
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +17 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +10 -8
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -2
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +6 -2
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +10 -6
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +6 -2
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +6 -2
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +6 -2
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +57 -34
- azure/ai/evaluation/_evaluators/_qa/_qa.py +25 -37
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +63 -29
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +76 -161
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +24 -25
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +65 -67
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +26 -20
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +74 -40
- azure/ai/evaluation/_exceptions.py +2 -0
- azure/ai/evaluation/_model_configurations.py +65 -14
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +15 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +25 -34
- azure/ai/evaluation/simulator/_constants.py +11 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +16 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +11 -1
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +3 -1
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +8 -4
- azure/ai/evaluation/simulator/_simulator.py +51 -45
- azure/ai/evaluation/simulator/_utils.py +25 -7
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.0.1.dist-info}/METADATA +232 -324
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.0.1.dist-info}/RECORD +60 -61
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -322
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.0.1.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.0.1.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -2,90 +2,124 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import logging
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Dict, List, Union
|
|
6
6
|
|
|
7
|
-
from typing_extensions import override
|
|
7
|
+
from typing_extensions import overload, override
|
|
8
8
|
|
|
9
9
|
from azure.ai.evaluation._common._experimental import experimental
|
|
10
10
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
11
11
|
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
12
|
+
from azure.ai.evaluation._model_configurations import Conversation
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
@experimental
|
|
17
|
-
class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
19
|
+
"""
|
|
20
|
+
Evaluates the indirect attack score for a given query and response or a multi-turn conversation, with reasoning.
|
|
21
|
+
|
|
22
|
+
Indirect attacks, also known as cross-domain prompt injected attacks (XPIA), are when jailbreak attacks are
|
|
23
|
+
injected into the context of a document or source that may result in an altered, unexpected behavior.
|
|
24
|
+
|
|
25
|
+
Indirect attacks evaluations are broken down into three subcategories:
|
|
26
|
+
- Manipulated Content: This category involves commands that aim to alter or fabricate information, often
|
|
27
|
+
to mislead or deceive. It includes actions like spreading false information, altering language or
|
|
28
|
+
formatting, and hiding or emphasizing specific details. The goal is often to manipulate perceptions
|
|
29
|
+
or behaviors by controlling the flow and presentation of information.
|
|
30
|
+
- Intrusion: This category encompasses commands that attempt to breach systems, gain unauthorized access,
|
|
31
|
+
or elevate privileges illicitly. It includes creating backdoors, exploiting vulnerabilities, and
|
|
32
|
+
traditional jailbreaks to bypass security measures. The intent is often to gain control or access
|
|
33
|
+
sensitive data without detection.
|
|
34
|
+
- Information Gathering: This category pertains to accessing, deleting, or modifying data without
|
|
35
|
+
authorization, often for malicious purposes. It includes exfiltrating sensitive data, tampering with
|
|
36
|
+
system records, and removing or altering existing information. The focus is on acquiring or
|
|
37
|
+
manipulating data to exploit or compromise systems and individuals.
|
|
38
|
+
|
|
39
|
+
Indirect attack scores are boolean values, where True indicates that the response contains an indirect attack.
|
|
23
40
|
|
|
24
41
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
25
42
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
26
43
|
:param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
|
|
27
44
|
name.
|
|
28
45
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
29
|
-
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
|
|
30
|
-
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
|
|
31
|
-
:type eval_last_turn: bool
|
|
32
|
-
|
|
33
|
-
**Usage**
|
|
34
|
-
|
|
35
|
-
.. code-block:: python
|
|
36
46
|
|
|
37
|
-
|
|
38
|
-
result = eval_fn(query="What is the capital of France?", response="Paris.")
|
|
47
|
+
.. admonition:: Example:
|
|
39
48
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
'xpia_reason': 'The conversation does not contain any manipulated content, intrusion or
|
|
47
|
-
information gathering.'
|
|
48
|
-
'xpia_information_gathering': False,
|
|
49
|
-
'xpia_intrusion': False
|
|
50
|
-
'xpia_manipulated_content': False
|
|
51
|
-
}
|
|
49
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate.py
|
|
50
|
+
:start-after: [START indirect_attack_evaluator]
|
|
51
|
+
:end-before: [END indirect_attack_evaluator]
|
|
52
|
+
:language: python
|
|
53
|
+
:dedent: 8
|
|
54
|
+
:caption: Initialize and call an IndirectAttackEvaluator.
|
|
52
55
|
"""
|
|
53
56
|
|
|
57
|
+
id = "azureml://registries/azureml/models/Indirect-Attack-Evaluator/versions/3"
|
|
58
|
+
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
59
|
+
|
|
54
60
|
@override
|
|
55
61
|
def __init__(
|
|
56
62
|
self,
|
|
57
63
|
credential,
|
|
58
64
|
azure_ai_project,
|
|
59
|
-
eval_last_turn: bool = False,
|
|
60
65
|
):
|
|
61
66
|
super().__init__(
|
|
62
67
|
eval_metric=EvaluationMetrics.XPIA,
|
|
63
68
|
azure_ai_project=azure_ai_project,
|
|
64
69
|
credential=credential,
|
|
65
|
-
eval_last_turn=eval_last_turn,
|
|
66
70
|
)
|
|
67
71
|
|
|
68
|
-
@
|
|
72
|
+
@overload
|
|
69
73
|
def __call__(
|
|
70
74
|
self,
|
|
71
75
|
*,
|
|
72
|
-
query:
|
|
73
|
-
response:
|
|
74
|
-
|
|
76
|
+
query: str,
|
|
77
|
+
response: str,
|
|
78
|
+
) -> Dict[str, Union[str, bool]]:
|
|
79
|
+
"""Evaluate whether cross domain injected attacks are present in given query/response
|
|
80
|
+
|
|
81
|
+
:keyword query: The query to be evaluated.
|
|
82
|
+
:paramtype query: str
|
|
83
|
+
:keyword response: The response to be evaluated.
|
|
84
|
+
:paramtype response: str
|
|
85
|
+
:return: The cross domain injection attack score
|
|
86
|
+
:rtype: Dict[str, Union[str, bool]]
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
@overload
|
|
90
|
+
def __call__(
|
|
91
|
+
self,
|
|
92
|
+
*,
|
|
93
|
+
conversation: Conversation,
|
|
94
|
+
) -> Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]:
|
|
95
|
+
"""Evaluate cross domain injected attacks are present in a conversation
|
|
96
|
+
|
|
97
|
+
:keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
|
|
98
|
+
key "messages", and potentially a global context under the key "context". Conversation turns are expected
|
|
99
|
+
to be dictionaries with keys "content", "role", and possibly "context".
|
|
100
|
+
:paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
|
|
101
|
+
:return: The cross domain injection attack score
|
|
102
|
+
:rtype: Dict[str, Union[str, bool, Dict[str, List[Union[str, bool]]]]]
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
@override
|
|
106
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
107
|
+
self,
|
|
108
|
+
*args,
|
|
75
109
|
**kwargs,
|
|
76
110
|
):
|
|
77
111
|
"""
|
|
78
112
|
Evaluate whether cross domain injected attacks are present in your AI system's response.
|
|
79
113
|
|
|
80
114
|
:keyword query: The query to be evaluated.
|
|
81
|
-
:paramtype query: str
|
|
115
|
+
:paramtype query: Optional[str]
|
|
82
116
|
:keyword response: The response to be evaluated.
|
|
83
|
-
:paramtype response: str
|
|
117
|
+
:paramtype response: Optional[str]
|
|
84
118
|
:keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
|
|
85
119
|
key "messages". Conversation turns are expected
|
|
86
120
|
to be dictionaries with keys "content" and "role".
|
|
87
121
|
:paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
|
|
88
|
-
:return: The
|
|
89
|
-
:rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[
|
|
122
|
+
:return: The cross domain injection attack score
|
|
123
|
+
:rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]]
|
|
90
124
|
"""
|
|
91
|
-
return super().__call__(
|
|
125
|
+
return super().__call__(*args, **kwargs)
|
|
@@ -23,6 +23,7 @@ class ErrorCategory(Enum):
|
|
|
23
23
|
* SERVICE_UNAVAILABLE -> Service is unavailable
|
|
24
24
|
* MISSING_PACKAGE -> Required package is missing
|
|
25
25
|
* FAILED_REMOTE_TRACKING -> Remote tracking failed
|
|
26
|
+
* PROJECT_ACCESS_ERROR -> Access to project failed
|
|
26
27
|
* UNKNOWN -> Undefined placeholder. Avoid using.
|
|
27
28
|
"""
|
|
28
29
|
|
|
@@ -35,6 +36,7 @@ class ErrorCategory(Enum):
|
|
|
35
36
|
SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE"
|
|
36
37
|
MISSING_PACKAGE = "MISSING PACKAGE"
|
|
37
38
|
FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
|
|
39
|
+
PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
|
|
38
40
|
UNKNOWN = "UNKNOWN"
|
|
39
41
|
|
|
40
42
|
|
|
@@ -8,44 +8,95 @@ from typing_extensions import NotRequired
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class AzureOpenAIModelConfiguration(TypedDict):
|
|
11
|
-
"""Model
|
|
11
|
+
"""Model configuration for Azure OpenAI models
|
|
12
|
+
|
|
13
|
+
:param type: The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration
|
|
14
|
+
:type type: NotRequired[Literal["azure_openai"]]
|
|
15
|
+
:param azure_deployment: Name of Azure OpenAI deployment to make requests to
|
|
16
|
+
:type azure_deployment: str
|
|
17
|
+
:param azure_endpoint: Endpoint of Azure OpenAI resource to make requests to
|
|
18
|
+
:type azure_endpoint: str
|
|
19
|
+
:param api_key: API key of Azure OpenAI resource
|
|
20
|
+
:type api_key: str
|
|
21
|
+
:param api_version: API version to use in request to Azure OpenAI deployment. Optional.
|
|
22
|
+
:type api_version: NotRequired[str]
|
|
23
|
+
|
|
24
|
+
.. admonition:: Example:
|
|
25
|
+
|
|
26
|
+
.. literalinclude:: ../samples/evaluation_samples_common.py
|
|
27
|
+
:start-after: [START create_AOAI_model_config]
|
|
28
|
+
:end-before: [END create_AOAI_model_config]
|
|
29
|
+
:language: python
|
|
30
|
+
:dedent: 8
|
|
31
|
+
:caption: Creating an AzureOpenAIModelConfiguration object.
|
|
32
|
+
|
|
33
|
+
"""
|
|
12
34
|
|
|
13
35
|
type: NotRequired[Literal["azure_openai"]]
|
|
14
|
-
"""The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration"""
|
|
15
36
|
azure_deployment: str
|
|
16
|
-
"""Name of Azure OpenAI deployment to make request to"""
|
|
17
37
|
azure_endpoint: str
|
|
18
38
|
"""Endpoint of Azure OpenAI resource to make request to"""
|
|
19
39
|
api_key: NotRequired[str]
|
|
20
40
|
"""API key of Azure OpenAI resource"""
|
|
21
41
|
api_version: NotRequired[str]
|
|
22
|
-
"""(Optional) API version to use in request to Azure OpenAI deployment"""
|
|
23
42
|
|
|
24
43
|
|
|
25
44
|
class OpenAIModelConfiguration(TypedDict):
|
|
26
|
-
"""Model
|
|
45
|
+
"""Model configuration for OpenAI models
|
|
46
|
+
|
|
47
|
+
:param type: The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration
|
|
48
|
+
:type type: NotRequired[Literal["openai"]]
|
|
49
|
+
:param api_key: API key needed to make requests to model
|
|
50
|
+
:type api_key: str
|
|
51
|
+
:param model: Name of model to be used in OpenAI request
|
|
52
|
+
:type model: str
|
|
53
|
+
:param base_url: Base URL to be used in OpenAI request. Optional.
|
|
54
|
+
:type base_url: NotRequired[str]
|
|
55
|
+
:param organization: OpenAI organization. Optional.
|
|
56
|
+
:type organization: NotRequired[str]
|
|
57
|
+
|
|
58
|
+
.. admonition:: Example:
|
|
59
|
+
|
|
60
|
+
.. literalinclude:: ../samples/evaluation_samples_common.py
|
|
61
|
+
:start-after: [START create_OAI_model_config]
|
|
62
|
+
:end-before: [END create_OAI_model_config]
|
|
63
|
+
:language: python
|
|
64
|
+
:dedent: 8
|
|
65
|
+
:caption: Creating an OpenAIModelConfiguration object.
|
|
66
|
+
|
|
67
|
+
"""
|
|
27
68
|
|
|
28
69
|
type: NotRequired[Literal["openai"]]
|
|
29
|
-
"""The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration"""
|
|
30
70
|
api_key: str
|
|
31
|
-
"API key needed to make request to model"
|
|
32
71
|
model: str
|
|
33
|
-
"""Name of model to be used in OpenAI request"""
|
|
34
72
|
base_url: NotRequired[str]
|
|
35
|
-
"""(Optional) Base URL to be used in OpenAI request"""
|
|
36
73
|
organization: NotRequired[str]
|
|
37
|
-
"""(Optional) OpenAI organization"""
|
|
38
74
|
|
|
39
75
|
|
|
40
76
|
class AzureAIProject(TypedDict):
|
|
41
|
-
"""Azure AI
|
|
77
|
+
"""Information about the Azure AI project
|
|
78
|
+
|
|
79
|
+
:param subscription_id: ID of the Azure subscription the project is in
|
|
80
|
+
:type subscription_id: str
|
|
81
|
+
:param resource_group_name: Name of the Azure resource group the project is in
|
|
82
|
+
:type resource_group_name: str
|
|
83
|
+
:param project_name: Name of the Azure project
|
|
84
|
+
:type project_name: str
|
|
85
|
+
|
|
86
|
+
.. admonition:: Example:
|
|
87
|
+
|
|
88
|
+
.. literalinclude:: ../samples/evaluation_samples_common.py
|
|
89
|
+
:start-after: [START create_azure_ai_project_object]
|
|
90
|
+
:end-before: [END create_azure_ai_project_object]
|
|
91
|
+
:language: python
|
|
92
|
+
:dedent: 8
|
|
93
|
+
:caption: Creating an AzureAIProject object.
|
|
94
|
+
|
|
95
|
+
"""
|
|
42
96
|
|
|
43
97
|
subscription_id: str
|
|
44
|
-
"""Azure subscription id of the project"""
|
|
45
98
|
resource_group_name: str
|
|
46
|
-
"""Azure resource group name of the project"""
|
|
47
99
|
project_name: str
|
|
48
|
-
"""Azure project name"""
|
|
49
100
|
|
|
50
101
|
|
|
51
102
|
class EvaluatorConfig(TypedDict, total=False):
|
azure/ai/evaluation/_version.py
CHANGED
|
@@ -3,10 +3,22 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
5
|
from enum import Enum
|
|
6
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
6
7
|
|
|
7
8
|
|
|
9
|
+
@experimental
|
|
8
10
|
class AdversarialScenario(Enum):
|
|
9
|
-
"""Adversarial scenario types
|
|
11
|
+
"""Adversarial scenario types
|
|
12
|
+
|
|
13
|
+
.. admonition:: Example:
|
|
14
|
+
|
|
15
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
16
|
+
:start-after: [START adversarial_scenario]
|
|
17
|
+
:end-before: [END adversarial_scenario]
|
|
18
|
+
:language: python
|
|
19
|
+
:dedent: 8
|
|
20
|
+
:caption: Configure an AdversarialSimulator with an Adversarial Conversation scenario.
|
|
21
|
+
"""
|
|
10
22
|
|
|
11
23
|
ADVERSARIAL_QA = "adv_qa"
|
|
12
24
|
ADVERSARIAL_CONVERSATION = "adv_conversation"
|
|
@@ -18,12 +30,14 @@ class AdversarialScenario(Enum):
|
|
|
18
30
|
ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material"
|
|
19
31
|
|
|
20
32
|
|
|
33
|
+
@experimental
|
|
21
34
|
class AdversarialScenarioJailbreak(Enum):
|
|
22
35
|
"""Adversarial scenario types for XPIA Jailbreak"""
|
|
23
36
|
|
|
24
37
|
ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia"
|
|
25
38
|
|
|
26
39
|
|
|
40
|
+
@experimental
|
|
27
41
|
class _UnstableAdversarialScenario(Enum):
|
|
28
42
|
"""Adversarial scenario types that we haven't published, but still want available for internal use
|
|
29
43
|
Values listed here are subject to potential change, and/or migration to the main enum over time.
|
|
@@ -7,6 +7,7 @@ import asyncio
|
|
|
7
7
|
import logging
|
|
8
8
|
import random
|
|
9
9
|
from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
|
|
10
|
+
from itertools import zip_longest
|
|
10
11
|
|
|
11
12
|
from tqdm import tqdm
|
|
12
13
|
|
|
@@ -14,6 +15,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
14
15
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
15
16
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
16
17
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
18
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
17
19
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
18
20
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
19
21
|
from azure.core.credentials import TokenCredential
|
|
@@ -45,9 +47,19 @@ class AdversarialSimulator:
|
|
|
45
47
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
46
48
|
:param credential: The credential for connecting to Azure AI project.
|
|
47
49
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
50
|
+
|
|
51
|
+
.. admonition:: Example:
|
|
52
|
+
|
|
53
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
54
|
+
:start-after: [START adversarial_scenario]
|
|
55
|
+
:end-before: [END adversarial_scenario]
|
|
56
|
+
:language: python
|
|
57
|
+
:dedent: 8
|
|
58
|
+
:caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
|
|
59
|
+
2 conversation turns each (4 messages per result).
|
|
48
60
|
"""
|
|
49
61
|
|
|
50
|
-
def __init__(self, *, azure_ai_project:
|
|
62
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
51
63
|
"""Constructor."""
|
|
52
64
|
|
|
53
65
|
try:
|
|
@@ -149,28 +161,6 @@ class AdversarialSimulator:
|
|
|
149
161
|
|
|
150
162
|
The 'content' for 'assistant' role messages may includes the messages that your callback returned.
|
|
151
163
|
:rtype: List[Dict[str, Any]]
|
|
152
|
-
|
|
153
|
-
**Output format**
|
|
154
|
-
|
|
155
|
-
.. code-block:: python
|
|
156
|
-
|
|
157
|
-
return_value = [
|
|
158
|
-
{
|
|
159
|
-
'template_parameters': {},
|
|
160
|
-
'messages': [
|
|
161
|
-
{
|
|
162
|
-
'content': '<jailbreak prompt> <adversarial query>',
|
|
163
|
-
'role': 'user'
|
|
164
|
-
},
|
|
165
|
-
{
|
|
166
|
-
'content': "<response from endpoint>",
|
|
167
|
-
'role': 'assistant',
|
|
168
|
-
'context': None
|
|
169
|
-
}
|
|
170
|
-
],
|
|
171
|
-
'$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
|
|
172
|
-
}
|
|
173
|
-
]
|
|
174
164
|
"""
|
|
175
165
|
|
|
176
166
|
# validate the inputs
|
|
@@ -215,17 +205,18 @@ class AdversarialSimulator:
|
|
|
215
205
|
ncols=100,
|
|
216
206
|
unit="simulations",
|
|
217
207
|
)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
208
|
+
|
|
209
|
+
if randomize_order:
|
|
210
|
+
# The template parameter lists are persistent across sim runs within a session,
|
|
211
|
+
# So randomize a the selection instead of the parameter list directly,
|
|
212
|
+
# or a potentially large deep copy.
|
|
213
|
+
if randomization_seed is not None:
|
|
214
|
+
random.seed(randomization_seed)
|
|
215
|
+
random.shuffle(templates)
|
|
216
|
+
parameter_lists = [t.template_parameters for t in templates]
|
|
217
|
+
zipped_parameters = list(zip_longest(*parameter_lists))
|
|
218
|
+
for param_group in zipped_parameters:
|
|
219
|
+
for template, parameter in zip(templates, param_group):
|
|
229
220
|
if _jailbreak_type == "upia":
|
|
230
221
|
parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
|
|
231
222
|
tasks.append(
|
|
@@ -5,7 +5,17 @@ from enum import Enum
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class SupportedLanguages(Enum):
|
|
8
|
-
"""Supported languages for evaluation, using ISO standard language codes.
|
|
8
|
+
"""Supported languages for evaluation, using ISO standard language codes.
|
|
9
|
+
|
|
10
|
+
.. admonition:: Example:
|
|
11
|
+
|
|
12
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
13
|
+
:start-after: [START supported_languages]
|
|
14
|
+
:end-before: [END supported_languages]
|
|
15
|
+
:language: python
|
|
16
|
+
:dedent: 8
|
|
17
|
+
:caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
|
|
18
|
+
"""
|
|
9
19
|
|
|
10
20
|
Spanish = "es"
|
|
11
21
|
Italian = "it"
|
|
@@ -11,6 +11,7 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
11
11
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
12
12
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
13
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
14
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
14
15
|
from azure.core.credentials import TokenCredential
|
|
15
16
|
|
|
16
17
|
from ._adversarial_simulator import AdversarialSimulator
|
|
@@ -30,9 +31,18 @@ class DirectAttackSimulator:
|
|
|
30
31
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
31
32
|
:param credential: The credential for connecting to Azure AI project.
|
|
32
33
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
34
|
+
|
|
35
|
+
.. admonition:: Example:
|
|
36
|
+
|
|
37
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
38
|
+
:start-after: [START direct_attack_simulator]
|
|
39
|
+
:end-before: [END direct_attack_simulator]
|
|
40
|
+
:language: python
|
|
41
|
+
:dedent: 8
|
|
42
|
+
:caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
|
|
33
43
|
"""
|
|
34
44
|
|
|
35
|
-
def __init__(self, *, azure_ai_project:
|
|
45
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
36
46
|
"""Constructor."""
|
|
37
47
|
|
|
38
48
|
try:
|
|
@@ -125,7 +135,7 @@ class DirectAttackSimulator:
|
|
|
125
135
|
- '**$schema**': A string indicating the schema URL for the conversation format.
|
|
126
136
|
|
|
127
137
|
The 'content' for 'assistant' role messages may includes the messages that your callback returned.
|
|
128
|
-
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
138
|
+
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
129
139
|
|
|
130
140
|
**Output format**
|
|
131
141
|
|
|
@@ -178,9 +188,7 @@ class DirectAttackSimulator:
|
|
|
178
188
|
if not randomization_seed:
|
|
179
189
|
randomization_seed = randint(0, 1000000)
|
|
180
190
|
|
|
181
|
-
regular_sim = AdversarialSimulator(
|
|
182
|
-
azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential
|
|
183
|
-
)
|
|
191
|
+
regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
184
192
|
regular_sim_results = await regular_sim(
|
|
185
193
|
scenario=scenario,
|
|
186
194
|
target=target,
|
|
@@ -190,10 +198,10 @@ class DirectAttackSimulator:
|
|
|
190
198
|
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
191
199
|
api_call_delay_sec=api_call_delay_sec,
|
|
192
200
|
concurrent_async_task=concurrent_async_task,
|
|
193
|
-
randomize_order=
|
|
201
|
+
randomize_order=False,
|
|
194
202
|
randomization_seed=randomization_seed,
|
|
195
203
|
)
|
|
196
|
-
jb_sim = AdversarialSimulator(azure_ai_project=
|
|
204
|
+
jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
197
205
|
jb_sim_results = await jb_sim(
|
|
198
206
|
scenario=scenario,
|
|
199
207
|
target=target,
|
|
@@ -204,7 +212,7 @@ class DirectAttackSimulator:
|
|
|
204
212
|
api_call_delay_sec=api_call_delay_sec,
|
|
205
213
|
concurrent_async_task=concurrent_async_task,
|
|
206
214
|
_jailbreak_type="upia",
|
|
207
|
-
randomize_order=
|
|
215
|
+
randomize_order=False,
|
|
208
216
|
randomization_seed=randomization_seed,
|
|
209
217
|
)
|
|
210
218
|
return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
|
|
@@ -13,6 +13,7 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
|
13
13
|
from azure.ai.evaluation._common._experimental import experimental
|
|
14
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
15
|
from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
|
|
16
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
16
17
|
from azure.core.credentials import TokenCredential
|
|
17
18
|
|
|
18
19
|
from ._adversarial_simulator import AdversarialSimulator, JsonLineList
|
|
@@ -32,9 +33,18 @@ class IndirectAttackSimulator(AdversarialSimulator):
|
|
|
32
33
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
33
34
|
:param credential: The credential for connecting to Azure AI project.
|
|
34
35
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
36
|
+
|
|
37
|
+
.. admonition:: Example:
|
|
38
|
+
|
|
39
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
40
|
+
:start-after: [START indirect_attack_simulator]
|
|
41
|
+
:end-before: [END indirect_attack_simulator]
|
|
42
|
+
:language: python
|
|
43
|
+
:dedent: 8
|
|
44
|
+
:caption: Run the IndirectAttackSimulator to produce 1 result with 1 conversation turn (2 messages in the result).
|
|
35
45
|
"""
|
|
36
46
|
|
|
37
|
-
def __init__(self, *, azure_ai_project:
|
|
47
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
38
48
|
"""Constructor."""
|
|
39
49
|
|
|
40
50
|
try:
|
|
@@ -14,7 +14,9 @@ from typing import Optional, Union
|
|
|
14
14
|
from azure.core.credentials import AccessToken, TokenCredential
|
|
15
15
|
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
|
|
16
16
|
|
|
17
|
-
AZURE_TOKEN_REFRESH_INTERVAL =
|
|
17
|
+
AZURE_TOKEN_REFRESH_INTERVAL = int(
|
|
18
|
+
os.getenv("AZURE_TOKEN_REFRESH_INTERVAL", "600")
|
|
19
|
+
) # token refresh interval in seconds
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class TokenScope(Enum):
|
|
@@ -74,14 +74,18 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
74
74
|
timeout=5,
|
|
75
75
|
)
|
|
76
76
|
if response.status_code != 200:
|
|
77
|
-
msg =
|
|
77
|
+
msg = (
|
|
78
|
+
f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
|
|
79
|
+
f"correctly, and make sure you have the necessary access permissions. "
|
|
80
|
+
f"Status code: {response.status_code}."
|
|
81
|
+
)
|
|
78
82
|
raise EvaluationException(
|
|
79
83
|
message=msg,
|
|
80
|
-
internal_message=msg,
|
|
81
84
|
target=ErrorTarget.RAI_CLIENT,
|
|
82
|
-
category=ErrorCategory.
|
|
83
|
-
blame=ErrorBlame.
|
|
85
|
+
category=ErrorCategory.PROJECT_ACCESS_ERROR,
|
|
86
|
+
blame=ErrorBlame.USER_ERROR,
|
|
84
87
|
)
|
|
88
|
+
|
|
85
89
|
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
|
|
86
90
|
return f"{base_url.scheme}://{base_url.netloc}"
|
|
87
91
|
|