azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +22 -0
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +4 -0
- azure/ai/evaluation/_common/constants.py +5 -0
- azure/ai/evaluation/_common/math.py +73 -2
- azure/ai/evaluation/_common/rai_service.py +250 -62
- azure/ai/evaluation/_common/utils.py +196 -23
- azure/ai/evaluation/_constants.py +7 -6
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +13 -4
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +55 -14
- azure/ai/evaluation/_evaluate/_evaluate.py +312 -228
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +7 -6
- azure/ai/evaluation/_evaluate/_utils.py +46 -11
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +17 -18
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +67 -31
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +37 -24
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +21 -9
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +52 -16
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +91 -48
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +100 -26
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +94 -26
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +96 -26
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +97 -26
- azure/ai/evaluation/_evaluators/_eci/_eci.py +31 -4
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -13
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +67 -36
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +14 -16
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +106 -34
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +20 -27
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +87 -31
- azure/ai/evaluation/_evaluators/_qa/_qa.py +23 -31
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +72 -36
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +83 -125
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +26 -27
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +37 -28
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +94 -33
- azure/ai/evaluation/_exceptions.py +19 -0
- azure/ai/evaluation/_model_configurations.py +83 -15
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +20 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +29 -35
- azure/ai/evaluation/simulator/_constants.py +11 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +17 -9
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +22 -1
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +90 -35
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +4 -2
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +8 -4
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -4
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -1
- azure/ai/evaluation/simulator/_simulator.py +165 -105
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.0.1.dist-info/METADATA +600 -0
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/NOTICE.txt +20 -0
- azure_ai_evaluation-1.0.1.dist-info/RECORD +119 -0
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -322
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
- azure_ai_evaluation-1.0.0b4.dist-info/METADATA +0 -535
- azure_ai_evaluation-1.0.0b4.dist-info/RECORD +0 -106
- /azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +0 -0
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -7,13 +7,14 @@ import logging
|
|
|
7
7
|
from random import randint
|
|
8
8
|
from typing import Callable, Optional, cast
|
|
9
9
|
|
|
10
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
10
11
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
11
12
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
12
13
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
14
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
13
15
|
from azure.core.credentials import TokenCredential
|
|
14
16
|
|
|
15
17
|
from ._adversarial_simulator import AdversarialSimulator
|
|
16
|
-
from ._helpers import experimental
|
|
17
18
|
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
@@ -30,9 +31,18 @@ class DirectAttackSimulator:
|
|
|
30
31
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
31
32
|
:param credential: The credential for connecting to Azure AI project.
|
|
32
33
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
34
|
+
|
|
35
|
+
.. admonition:: Example:
|
|
36
|
+
|
|
37
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
38
|
+
:start-after: [START direct_attack_simulator]
|
|
39
|
+
:end-before: [END direct_attack_simulator]
|
|
40
|
+
:language: python
|
|
41
|
+
:dedent: 8
|
|
42
|
+
:caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
|
|
33
43
|
"""
|
|
34
44
|
|
|
35
|
-
def __init__(self, *, azure_ai_project:
|
|
45
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
36
46
|
"""Constructor."""
|
|
37
47
|
|
|
38
48
|
try:
|
|
@@ -125,7 +135,7 @@ class DirectAttackSimulator:
|
|
|
125
135
|
- '**$schema**': A string indicating the schema URL for the conversation format.
|
|
126
136
|
|
|
127
137
|
The 'content' for 'assistant' role messages may includes the messages that your callback returned.
|
|
128
|
-
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
138
|
+
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
129
139
|
|
|
130
140
|
**Output format**
|
|
131
141
|
|
|
@@ -178,9 +188,7 @@ class DirectAttackSimulator:
|
|
|
178
188
|
if not randomization_seed:
|
|
179
189
|
randomization_seed = randint(0, 1000000)
|
|
180
190
|
|
|
181
|
-
regular_sim = AdversarialSimulator(
|
|
182
|
-
azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential
|
|
183
|
-
)
|
|
191
|
+
regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
184
192
|
regular_sim_results = await regular_sim(
|
|
185
193
|
scenario=scenario,
|
|
186
194
|
target=target,
|
|
@@ -190,10 +198,10 @@ class DirectAttackSimulator:
|
|
|
190
198
|
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
191
199
|
api_call_delay_sec=api_call_delay_sec,
|
|
192
200
|
concurrent_async_task=concurrent_async_task,
|
|
193
|
-
randomize_order=
|
|
201
|
+
randomize_order=False,
|
|
194
202
|
randomization_seed=randomization_seed,
|
|
195
203
|
)
|
|
196
|
-
jb_sim = AdversarialSimulator(azure_ai_project=
|
|
204
|
+
jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
197
205
|
jb_sim_results = await jb_sim(
|
|
198
206
|
scenario=scenario,
|
|
199
207
|
target=target,
|
|
@@ -204,7 +212,7 @@ class DirectAttackSimulator:
|
|
|
204
212
|
api_call_delay_sec=api_call_delay_sec,
|
|
205
213
|
concurrent_async_task=concurrent_async_task,
|
|
206
214
|
_jailbreak_type="upia",
|
|
207
|
-
randomize_order=
|
|
215
|
+
randomize_order=False,
|
|
208
216
|
randomization_seed=randomization_seed,
|
|
209
217
|
)
|
|
210
218
|
return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from ._experimental import experimental
|
|
2
1
|
from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
3
2
|
from ._simulator_data_classes import ConversationHistory, Turn
|
|
4
3
|
|
|
5
|
-
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"
|
|
4
|
+
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
|
|
@@ -30,7 +30,19 @@ class Turn:
|
|
|
30
30
|
return {
|
|
31
31
|
"role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
|
|
32
32
|
"content": self.content,
|
|
33
|
-
"context": self.context,
|
|
33
|
+
"context": str(self.context),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def to_context_free_dict(self) -> Dict[str, Optional[str]]:
|
|
37
|
+
"""
|
|
38
|
+
Convert the conversation turn to a dictionary without context.
|
|
39
|
+
|
|
40
|
+
:returns: A dictionary representation of the conversation turn without context.
|
|
41
|
+
:rtype: Dict[str, Optional[str]]
|
|
42
|
+
"""
|
|
43
|
+
return {
|
|
44
|
+
"role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
|
|
45
|
+
"content": self.content,
|
|
34
46
|
}
|
|
35
47
|
|
|
36
48
|
def __repr__(self):
|
|
@@ -66,6 +78,15 @@ class ConversationHistory:
|
|
|
66
78
|
"""
|
|
67
79
|
return [turn.to_dict() for turn in self.history]
|
|
68
80
|
|
|
81
|
+
def to_context_free_list(self) -> List[Dict[str, Optional[str]]]:
|
|
82
|
+
"""
|
|
83
|
+
Converts the conversation history to a list of dictionaries without context.
|
|
84
|
+
|
|
85
|
+
:returns: A list of dictionaries representing the conversation turns without context.
|
|
86
|
+
:rtype: List[Dict[str, str]]
|
|
87
|
+
"""
|
|
88
|
+
return [turn.to_context_free_dict() for turn in self.history]
|
|
89
|
+
|
|
69
90
|
def __len__(self) -> int:
|
|
70
91
|
return len(self.history)
|
|
71
92
|
|
|
@@ -3,23 +3,28 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# pylint: disable=C0301,C0114,R0913,R0903
|
|
5
5
|
# noqa: E501
|
|
6
|
+
import asyncio
|
|
6
7
|
import logging
|
|
7
8
|
from typing import Callable, cast
|
|
8
9
|
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
9
12
|
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
13
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
10
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
11
|
-
from azure.ai.evaluation.simulator import
|
|
15
|
+
from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
|
|
16
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
12
17
|
from azure.core.credentials import TokenCredential
|
|
13
18
|
|
|
14
|
-
from ._adversarial_simulator import AdversarialSimulator
|
|
15
|
-
|
|
19
|
+
from ._adversarial_simulator import AdversarialSimulator, JsonLineList
|
|
20
|
+
|
|
16
21
|
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
17
22
|
|
|
18
23
|
logger = logging.getLogger(__name__)
|
|
19
24
|
|
|
20
25
|
|
|
21
26
|
@experimental
|
|
22
|
-
class IndirectAttackSimulator:
|
|
27
|
+
class IndirectAttackSimulator(AdversarialSimulator):
|
|
23
28
|
"""
|
|
24
29
|
Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
25
30
|
|
|
@@ -28,9 +33,18 @@ class IndirectAttackSimulator:
|
|
|
28
33
|
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
29
34
|
:param credential: The credential for connecting to Azure AI project.
|
|
30
35
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
36
|
+
|
|
37
|
+
.. admonition:: Example:
|
|
38
|
+
|
|
39
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
40
|
+
:start-after: [START indirect_attack_simulator]
|
|
41
|
+
:end-before: [END indirect_attack_simulator]
|
|
42
|
+
:language: python
|
|
43
|
+
:dedent: 8
|
|
44
|
+
:caption: Run the IndirectAttackSimulator to produce 1 result with 1 conversation turn (2 messages in the result).
|
|
31
45
|
"""
|
|
32
46
|
|
|
33
|
-
def __init__(self, *, azure_ai_project:
|
|
47
|
+
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
34
48
|
"""Constructor."""
|
|
35
49
|
|
|
36
50
|
try:
|
|
@@ -54,6 +68,7 @@ class IndirectAttackSimulator:
|
|
|
54
68
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
55
69
|
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
56
70
|
)
|
|
71
|
+
super().__init__(azure_ai_project=azure_ai_project, credential=credential)
|
|
57
72
|
|
|
58
73
|
def _ensure_service_dependencies(self):
|
|
59
74
|
if self.rai_client is None:
|
|
@@ -69,29 +84,22 @@ class IndirectAttackSimulator:
|
|
|
69
84
|
async def __call__(
|
|
70
85
|
self,
|
|
71
86
|
*,
|
|
72
|
-
scenario: AdversarialScenario,
|
|
73
87
|
target: Callable,
|
|
74
|
-
max_conversation_turns: int = 1,
|
|
75
88
|
max_simulation_results: int = 3,
|
|
76
89
|
api_call_retry_limit: int = 3,
|
|
77
90
|
api_call_retry_sleep_sec: int = 1,
|
|
78
91
|
api_call_delay_sec: int = 0,
|
|
79
92
|
concurrent_async_task: int = 3,
|
|
93
|
+
**kwargs,
|
|
80
94
|
):
|
|
81
95
|
"""
|
|
82
96
|
Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
83
97
|
This simulator converses with your AI system using prompts injected into the context to interrupt normal
|
|
84
98
|
expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside
|
|
85
99
|
the scope of your AI system.
|
|
86
|
-
|
|
87
|
-
:keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
|
|
88
|
-
:paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario
|
|
89
100
|
:keyword target: The target function to simulate adversarial inputs against.
|
|
90
101
|
This function should be asynchronous and accept a dictionary representing the adversarial input.
|
|
91
102
|
:paramtype target: Callable
|
|
92
|
-
:keyword max_conversation_turns: The maximum number of conversation turns to simulate.
|
|
93
|
-
Defaults to 1.
|
|
94
|
-
:paramtype max_conversation_turns: int
|
|
95
103
|
:keyword max_simulation_results: The maximum number of simulation results to return.
|
|
96
104
|
Defaults to 3.
|
|
97
105
|
:paramtype max_simulation_results: int
|
|
@@ -128,11 +136,11 @@ class IndirectAttackSimulator:
|
|
|
128
136
|
'template_parameters': {},
|
|
129
137
|
'messages': [
|
|
130
138
|
{
|
|
131
|
-
'content': '<
|
|
139
|
+
'content': '<adversarial query>',
|
|
132
140
|
'role': 'user'
|
|
133
141
|
},
|
|
134
142
|
{
|
|
135
|
-
'content': "<response from
|
|
143
|
+
'content': "<response from your callback>",
|
|
136
144
|
'role': 'assistant',
|
|
137
145
|
'context': None
|
|
138
146
|
}
|
|
@@ -141,25 +149,72 @@ class IndirectAttackSimulator:
|
|
|
141
149
|
}]
|
|
142
150
|
}
|
|
143
151
|
"""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
+
# values that cannot be changed:
|
|
153
|
+
scenario = AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK
|
|
154
|
+
max_conversation_turns = 2
|
|
155
|
+
language = SupportedLanguages.English
|
|
156
|
+
self._ensure_service_dependencies()
|
|
157
|
+
templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
|
|
158
|
+
concurrent_async_task = min(concurrent_async_task, 1000)
|
|
159
|
+
semaphore = asyncio.Semaphore(concurrent_async_task)
|
|
160
|
+
sim_results = []
|
|
161
|
+
tasks = []
|
|
162
|
+
total_tasks = sum(len(t.template_parameters) for t in templates)
|
|
163
|
+
if max_simulation_results > total_tasks:
|
|
164
|
+
logger.warning(
|
|
165
|
+
"Cannot provide %s results due to maximum number of adversarial simulations that can be generated: %s."
|
|
166
|
+
"\n %s simulations will be generated.",
|
|
167
|
+
max_simulation_results,
|
|
168
|
+
total_tasks,
|
|
169
|
+
total_tasks,
|
|
152
170
|
)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
api_call_retry_limit=api_call_retry_limit,
|
|
160
|
-
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
161
|
-
api_call_delay_sec=api_call_delay_sec,
|
|
162
|
-
concurrent_async_task=concurrent_async_task,
|
|
163
|
-
_jailbreak_type="xpia",
|
|
171
|
+
total_tasks = min(total_tasks, max_simulation_results)
|
|
172
|
+
progress_bar = tqdm(
|
|
173
|
+
total=total_tasks,
|
|
174
|
+
desc="generating jailbreak simulations",
|
|
175
|
+
ncols=100,
|
|
176
|
+
unit="simulations",
|
|
164
177
|
)
|
|
165
|
-
|
|
178
|
+
for template in templates:
|
|
179
|
+
for parameter in template.template_parameters:
|
|
180
|
+
tasks.append(
|
|
181
|
+
asyncio.create_task(
|
|
182
|
+
self._simulate_async(
|
|
183
|
+
target=target,
|
|
184
|
+
template=template,
|
|
185
|
+
parameters=parameter,
|
|
186
|
+
max_conversation_turns=max_conversation_turns,
|
|
187
|
+
api_call_retry_limit=api_call_retry_limit,
|
|
188
|
+
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
189
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
190
|
+
language=language,
|
|
191
|
+
semaphore=semaphore,
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
if len(tasks) >= max_simulation_results:
|
|
196
|
+
break
|
|
197
|
+
if len(tasks) >= max_simulation_results:
|
|
198
|
+
break
|
|
199
|
+
for task in asyncio.as_completed(tasks):
|
|
200
|
+
completed_task = await task # type: ignore
|
|
201
|
+
template_parameters = completed_task.get("template_parameters", {}) # type: ignore
|
|
202
|
+
xpia_attack_type = template_parameters.get("xpia_attack_type", "") # type: ignore
|
|
203
|
+
action = template_parameters.get("action", "") # type: ignore
|
|
204
|
+
document_type = template_parameters.get("document_type", "") # type: ignore
|
|
205
|
+
sim_results.append(
|
|
206
|
+
{
|
|
207
|
+
"messages": completed_task["messages"], # type: ignore
|
|
208
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
209
|
+
"template_parameters": {
|
|
210
|
+
"metadata": {
|
|
211
|
+
"xpia_attack_type": xpia_attack_type,
|
|
212
|
+
"action": action,
|
|
213
|
+
"document_type": document_type,
|
|
214
|
+
},
|
|
215
|
+
},
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
progress_bar.update(1)
|
|
219
|
+
progress_bar.close()
|
|
220
|
+
return JsonLineList(sim_results)
|
|
@@ -11,10 +11,12 @@ from abc import ABC, abstractmethod
|
|
|
11
11
|
from enum import Enum
|
|
12
12
|
from typing import Optional, Union
|
|
13
13
|
|
|
14
|
-
from azure.core.credentials import
|
|
14
|
+
from azure.core.credentials import AccessToken, TokenCredential
|
|
15
15
|
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
|
|
16
16
|
|
|
17
|
-
AZURE_TOKEN_REFRESH_INTERVAL =
|
|
17
|
+
AZURE_TOKEN_REFRESH_INTERVAL = int(
|
|
18
|
+
os.getenv("AZURE_TOKEN_REFRESH_INTERVAL", "600")
|
|
19
|
+
) # token refresh interval in seconds
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class TokenScope(Enum):
|
|
@@ -74,14 +74,18 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
74
74
|
timeout=5,
|
|
75
75
|
)
|
|
76
76
|
if response.status_code != 200:
|
|
77
|
-
msg =
|
|
77
|
+
msg = (
|
|
78
|
+
f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
|
|
79
|
+
f"correctly, and make sure you have the necessary access permissions. "
|
|
80
|
+
f"Status code: {response.status_code}."
|
|
81
|
+
)
|
|
78
82
|
raise EvaluationException(
|
|
79
83
|
message=msg,
|
|
80
|
-
internal_message=msg,
|
|
81
84
|
target=ErrorTarget.RAI_CLIENT,
|
|
82
|
-
category=ErrorCategory.
|
|
83
|
-
blame=ErrorBlame.
|
|
85
|
+
category=ErrorCategory.PROJECT_ACCESS_ERROR,
|
|
86
|
+
blame=ErrorBlame.USER_ERROR,
|
|
84
87
|
)
|
|
88
|
+
|
|
85
89
|
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
|
|
86
90
|
return f"{base_url.scheme}://{base_url.netloc}"
|
|
87
91
|
|
|
@@ -36,8 +36,8 @@ On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh.
|
|
|
36
36
|
Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%.
|
|
37
37
|
<|text_end|>
|
|
38
38
|
Output with 5 QnAs:
|
|
39
|
-
|
|
40
|
-
{
|
|
39
|
+
{
|
|
40
|
+
"qna":[{
|
|
41
41
|
"q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?",
|
|
42
42
|
"r": "January 24, 1984"
|
|
43
43
|
},
|
|
@@ -56,8 +56,8 @@ Output with 5 QnAs:
|
|
|
56
56
|
{
|
|
57
57
|
"q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?",
|
|
58
58
|
"r": "6%"
|
|
59
|
-
}
|
|
60
|
-
|
|
59
|
+
}]
|
|
60
|
+
}
|
|
61
61
|
Text:
|
|
62
62
|
<|text_start|>
|
|
63
63
|
{{ text }}
|
|
@@ -16,6 +16,9 @@ inputs:
|
|
|
16
16
|
type: string
|
|
17
17
|
conversation_history:
|
|
18
18
|
type: dict
|
|
19
|
+
action:
|
|
20
|
+
type: string
|
|
21
|
+
default: continue the converasation and make sure the task is completed by asking relevant questions
|
|
19
22
|
|
|
20
23
|
---
|
|
21
24
|
system:
|
|
@@ -25,8 +28,10 @@ Output must be in JSON format
|
|
|
25
28
|
Here's a sample output:
|
|
26
29
|
{
|
|
27
30
|
"content": "Here is my follow-up question.",
|
|
28
|
-
"
|
|
31
|
+
"role": "user"
|
|
29
32
|
}
|
|
30
33
|
|
|
31
34
|
Output with a json object that continues the conversation, given the conversation history:
|
|
32
35
|
{{ conversation_history }}
|
|
36
|
+
|
|
37
|
+
{{ action }}
|