azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +1 -5
- azure/ai/evaluation/_common/rai_service.py +4 -4
- azure/ai/evaluation/_common/utils.py +19 -19
- azure/ai/evaluation/_constants.py +9 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
- azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
- azure/ai/evaluation/_evaluate/_evaluate.py +35 -28
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
- azure/ai/evaluation/_evaluate/_utils.py +29 -22
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
- azure/ai/evaluation/_evaluators/_chat/_chat.py +16 -9
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +4 -10
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -10
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +1 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +1 -1
- azure/ai/evaluation/_evaluators/_eci/_eci.py +2 -2
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +5 -10
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +5 -10
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +2 -2
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +2 -2
- azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +5 -10
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -10
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +1 -2
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
- azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
- azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
- azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
- azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +147 -80
- azure/ai/evaluation/simulator/_tracing.py +21 -24
- azure/ai/evaluation/simulator/_utils.py +4 -1
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/METADATA +86 -14
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/RECORD +58 -56
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/top_level.txt +0 -0
|
@@ -2,15 +2,14 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import os
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
from urllib.parse import urljoin, urlparse
|
|
7
7
|
|
|
8
|
-
from azure.
|
|
9
|
-
|
|
8
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
9
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
|
|
11
|
-
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
12
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
13
10
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
11
|
+
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
12
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
14
13
|
|
|
15
14
|
from ._identity_manager import APITokenManager
|
|
16
15
|
|
|
@@ -21,7 +20,7 @@ if "RAI_SVC_URL" in os.environ:
|
|
|
21
20
|
print(f"Found RAI_SVC_URL in environment variable, using {api_url} for the service endpoint.")
|
|
22
21
|
|
|
23
22
|
|
|
24
|
-
class RAIClient:
|
|
23
|
+
class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
25
24
|
"""Client for the Responsible AI Service
|
|
26
25
|
|
|
27
26
|
:param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
|
|
@@ -31,7 +30,9 @@ class RAIClient:
|
|
|
31
30
|
:type token_manage: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
|
-
def __init__(
|
|
33
|
+
def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
|
|
34
|
+
self, azure_ai_project: AzureAIProject, token_manager: APITokenManager
|
|
35
|
+
) -> None:
|
|
35
36
|
self.azure_ai_project = azure_ai_project
|
|
36
37
|
self.token_manager = token_manager
|
|
37
38
|
|
|
@@ -73,7 +74,7 @@ class RAIClient:
|
|
|
73
74
|
timeout=5,
|
|
74
75
|
)
|
|
75
76
|
if response.status_code != 200:
|
|
76
|
-
msg =
|
|
77
|
+
msg = "Failed to retrieve the discovery service URL."
|
|
77
78
|
raise EvaluationException(
|
|
78
79
|
message=msg,
|
|
79
80
|
internal_message=msg,
|
|
@@ -104,7 +105,11 @@ class RAIClient:
|
|
|
104
105
|
return self.contentharm_parameters
|
|
105
106
|
|
|
106
107
|
async def get_jailbreaks_dataset(self, type: str) -> Any:
|
|
107
|
-
"Get the jailbreaks dataset, if exists
|
|
108
|
+
"""Get the jailbreaks dataset, if exists
|
|
109
|
+
|
|
110
|
+
:param type: The dataset type. Should be one of 'xpia' or 'upia'
|
|
111
|
+
:type type: str
|
|
112
|
+
"""
|
|
108
113
|
if self.jailbreaks_dataset is None:
|
|
109
114
|
if type == "xpia":
|
|
110
115
|
self.jailbreaks_dataset = await self.get(self.xpia_jailbreaks_json_endpoint)
|
|
@@ -146,8 +151,10 @@ class RAIClient:
|
|
|
146
151
|
if response.status_code == 200:
|
|
147
152
|
return response.json()
|
|
148
153
|
|
|
149
|
-
msg =
|
|
150
|
-
|
|
154
|
+
msg = (
|
|
155
|
+
"Azure safety evaluation service is not available in your current region, "
|
|
156
|
+
+ "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
|
|
157
|
+
)
|
|
151
158
|
raise EvaluationException(
|
|
152
159
|
message=msg,
|
|
153
160
|
internal_message=msg,
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Optional
|
|
6
6
|
|
|
7
7
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
8
8
|
|
|
@@ -12,10 +12,9 @@ from abc import ABC, abstractmethod
|
|
|
12
12
|
from collections import deque
|
|
13
13
|
from typing import Deque, Dict, List, Optional, Union
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
|
-
import ast
|
|
16
15
|
|
|
16
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
17
17
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
18
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
19
18
|
|
|
20
19
|
from ._identity_manager import APITokenManager
|
|
21
20
|
|
|
@@ -29,16 +28,15 @@ def get_model_class_from_url(endpoint_url: str):
|
|
|
29
28
|
|
|
30
29
|
if endpoint_path.endswith("chat/completions"):
|
|
31
30
|
return OpenAIChatCompletionsModel
|
|
32
|
-
|
|
31
|
+
if endpoint_path.endswith("completions"):
|
|
33
32
|
return OpenAICompletionsModel
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
)
|
|
33
|
+
raise EvaluationException(
|
|
34
|
+
message=f"Unknown API type for endpoint {endpoint_url}",
|
|
35
|
+
internal_message="Unknown API type",
|
|
36
|
+
error_category=ErrorCategory.UNKNOWN_FIELD,
|
|
37
|
+
error_blame=ErrorBlame.USER_ERROR,
|
|
38
|
+
error_target=ErrorTarget.MODELS,
|
|
39
|
+
)
|
|
42
40
|
|
|
43
41
|
|
|
44
42
|
# ===========================================================
|
|
File without changes
|
|
@@ -3,25 +3,27 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
5
5
|
# ---------------------------------------------------------
|
|
6
|
-
import re
|
|
7
6
|
import asyncio
|
|
7
|
+
import importlib.resources as pkg_resources
|
|
8
8
|
import json
|
|
9
9
|
import os
|
|
10
|
-
|
|
10
|
+
import re
|
|
11
11
|
import warnings
|
|
12
|
-
|
|
13
|
-
from tqdm import tqdm
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
14
13
|
|
|
15
14
|
from promptflow.client import load_flow
|
|
16
|
-
from promptflow.core import AzureOpenAIModelConfiguration
|
|
15
|
+
from promptflow.core import AzureOpenAIModelConfiguration, Flow
|
|
16
|
+
from tqdm import tqdm
|
|
17
17
|
|
|
18
18
|
from .._user_agent import USER_AGENT
|
|
19
19
|
from ._conversation.constants import ConversationRole
|
|
20
|
-
from ._helpers import ConversationHistory, Turn
|
|
20
|
+
from ._helpers import ConversationHistory, Turn, experimental
|
|
21
|
+
|
|
21
22
|
# from ._tracing import monitor_task_simulator
|
|
22
23
|
from ._utils import JsonLineChatProtocol
|
|
23
24
|
|
|
24
25
|
|
|
26
|
+
@experimental
|
|
25
27
|
class Simulator:
|
|
26
28
|
"""
|
|
27
29
|
Simulator for generating synthetic conversations.
|
|
@@ -48,6 +50,7 @@ class Simulator:
|
|
|
48
50
|
Validates the azure_ai_project configuration to ensure all required keys are present and have non-None values.
|
|
49
51
|
|
|
50
52
|
:param azure_ai_project: The Azure AI project configuration dictionary.
|
|
53
|
+
:type azure_ai_project: Dict[str, Any]
|
|
51
54
|
:raises ValueError: If required keys are missing or any of the values are None.
|
|
52
55
|
"""
|
|
53
56
|
required_keys = ["subscription_id", "resource_group_name", "project_name"]
|
|
@@ -60,7 +63,7 @@ class Simulator:
|
|
|
60
63
|
async def __call__(
|
|
61
64
|
self,
|
|
62
65
|
*,
|
|
63
|
-
target:
|
|
66
|
+
target: Callable,
|
|
64
67
|
max_conversation_turns: int = 5,
|
|
65
68
|
tasks: List[Dict] = [],
|
|
66
69
|
text: str = "",
|
|
@@ -77,7 +80,7 @@ class Simulator:
|
|
|
77
80
|
Generates synthetic conversations based on provided parameters.
|
|
78
81
|
|
|
79
82
|
:keyword target: The target function to call during the simulation.
|
|
80
|
-
:paramtype target:
|
|
83
|
+
:paramtype target: Callable
|
|
81
84
|
:keyword max_conversation_turns: Maximum number of conversation turns for the simulation. Each turn consists of a user and an assistant message.
|
|
82
85
|
:paramtype max_conversation_turns: int
|
|
83
86
|
:keyword tasks: A list of user tasks, each represented as a list of strings. Text should be relevant for the tasks and facilitate the simulation. One example is to use text to provide context for the tasks.
|
|
@@ -172,7 +175,7 @@ class Simulator:
|
|
|
172
175
|
async def _simulate_with_predefined_turns(
|
|
173
176
|
self,
|
|
174
177
|
*,
|
|
175
|
-
target:
|
|
178
|
+
target: Callable,
|
|
176
179
|
max_conversation_turns: int,
|
|
177
180
|
conversation_turns: List[List[str]],
|
|
178
181
|
user_simulator_prompty: Optional[str],
|
|
@@ -183,19 +186,26 @@ class Simulator:
|
|
|
183
186
|
"""
|
|
184
187
|
Simulates conversations using predefined conversation turns.
|
|
185
188
|
|
|
186
|
-
:
|
|
187
|
-
:
|
|
188
|
-
:
|
|
189
|
-
:
|
|
190
|
-
:
|
|
191
|
-
:
|
|
192
|
-
:
|
|
189
|
+
:keyword target: The target function to call during each turn of the simulation.
|
|
190
|
+
:paramtype target: Callable
|
|
191
|
+
:keyword max_conversation_turns: Maximum number of turns for the simulation.
|
|
192
|
+
:paramtype max_conversation_turns: int
|
|
193
|
+
:keyword conversation_turns: A list of predefined conversation turns.
|
|
194
|
+
:paramtype conversation_turns: List[List[str]]
|
|
195
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
196
|
+
:paramtype user_simulator_prompty: Optional[str]
|
|
197
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
198
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
199
|
+
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
200
|
+
:paramtype api_call_delay_sec: float
|
|
201
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
202
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
193
203
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
194
204
|
:rtype: List[JsonLineChatProtocol]
|
|
195
205
|
"""
|
|
196
206
|
simulated_conversations = []
|
|
197
207
|
progress_bar = tqdm(
|
|
198
|
-
total=int(len(conversation_turns) * (max_conversation_turns/2)),
|
|
208
|
+
total=int(len(conversation_turns) * (max_conversation_turns / 2)),
|
|
199
209
|
desc="Simulating with predefined conversation turns: ",
|
|
200
210
|
ncols=100,
|
|
201
211
|
unit="messages",
|
|
@@ -213,7 +223,7 @@ class Simulator:
|
|
|
213
223
|
current_simulation.add_to_history(assistant_turn)
|
|
214
224
|
progress_bar.update(1) # Update progress bar for both user and assistant turns
|
|
215
225
|
|
|
216
|
-
if current_simulation
|
|
226
|
+
if len(current_simulation) < max_conversation_turns:
|
|
217
227
|
await self._extend_conversation_with_simulator(
|
|
218
228
|
current_simulation=current_simulation,
|
|
219
229
|
max_conversation_turns=max_conversation_turns,
|
|
@@ -239,20 +249,28 @@ class Simulator:
|
|
|
239
249
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
240
250
|
api_call_delay_sec: float,
|
|
241
251
|
prompty_model_config: Dict[str, Any],
|
|
242
|
-
target:
|
|
252
|
+
target: Callable,
|
|
243
253
|
progress_bar: tqdm,
|
|
244
254
|
):
|
|
245
255
|
"""
|
|
246
256
|
Extends an ongoing conversation using a user simulator until the maximum number of turns is reached.
|
|
247
257
|
|
|
248
|
-
:
|
|
249
|
-
:
|
|
250
|
-
:
|
|
251
|
-
:
|
|
252
|
-
:
|
|
253
|
-
:
|
|
254
|
-
:
|
|
255
|
-
:
|
|
258
|
+
:keyword current_simulation: The current state of the conversation history.
|
|
259
|
+
:paramtype current_simulation: ConversationHistory,
|
|
260
|
+
:keyword max_conversation_turns: The maximum number of conversation turns.
|
|
261
|
+
:paramtype max_conversation_turns: int,
|
|
262
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
263
|
+
:paramtype user_simulator_prompty: Optional[str],
|
|
264
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
265
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any],
|
|
266
|
+
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
267
|
+
:paramtype api_call_delay_sec: float,
|
|
268
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
269
|
+
:paramtype prompty_model_config: Dict[str, Any],
|
|
270
|
+
:keyword target: The target function to call for responses.
|
|
271
|
+
:paramtype target: Callable,
|
|
272
|
+
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
273
|
+
:paramtype progress_bar: tqdm,
|
|
256
274
|
"""
|
|
257
275
|
user_flow = self._load_user_simulation_flow(
|
|
258
276
|
user_simulator_prompty=user_simulator_prompty,
|
|
@@ -260,7 +278,7 @@ class Simulator:
|
|
|
260
278
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
261
279
|
)
|
|
262
280
|
|
|
263
|
-
while current_simulation
|
|
281
|
+
while len(current_simulation) < max_conversation_turns:
|
|
264
282
|
user_response_content = user_flow(
|
|
265
283
|
task="Continue the conversation", conversation_history=current_simulation.to_list()
|
|
266
284
|
)
|
|
@@ -276,20 +294,33 @@ class Simulator:
|
|
|
276
294
|
progress_bar.update(1)
|
|
277
295
|
|
|
278
296
|
def _load_user_simulation_flow(
|
|
279
|
-
self,
|
|
280
|
-
|
|
297
|
+
self,
|
|
298
|
+
*,
|
|
299
|
+
user_simulator_prompty: Union[str, os.PathLike],
|
|
300
|
+
prompty_model_config: Dict[str, Any],
|
|
301
|
+
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
302
|
+
) -> Flow:
|
|
281
303
|
"""
|
|
282
304
|
Loads the flow for simulating user interactions.
|
|
283
305
|
|
|
284
|
-
:
|
|
285
|
-
:
|
|
286
|
-
:
|
|
306
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
307
|
+
:paramtype user_simulator_prompty: Union[str, os.PathLike]
|
|
308
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
309
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
310
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
311
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
287
312
|
:return: The loaded flow for simulating user interactions.
|
|
313
|
+
:rtype: Flow
|
|
288
314
|
"""
|
|
289
315
|
if not user_simulator_prompty:
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
316
|
+
package = "azure.ai.evaluation.simulator._prompty"
|
|
317
|
+
resource_name = "task_simulate.prompty"
|
|
318
|
+
try:
|
|
319
|
+
# Access the resource as a file path
|
|
320
|
+
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
321
|
+
return load_flow(source=str(prompty_path), model=prompty_model_config)
|
|
322
|
+
except FileNotFoundError as e:
|
|
323
|
+
raise f"Flow path for {resource_name} does not exist in package {package}." from e
|
|
293
324
|
return load_flow(
|
|
294
325
|
source=user_simulator_prompty,
|
|
295
326
|
model=prompty_model_config,
|
|
@@ -300,19 +331,20 @@ class Simulator:
|
|
|
300
331
|
"""
|
|
301
332
|
Parses the response from the prompty execution.
|
|
302
333
|
|
|
303
|
-
:
|
|
334
|
+
:keyword response: The raw response from the prompty.
|
|
335
|
+
:paramtype str: str
|
|
304
336
|
:return: A dictionary representing the parsed response content.
|
|
305
337
|
:rtype: Dict[str, Any]
|
|
306
338
|
:raises ValueError: If the response cannot be parsed.
|
|
307
339
|
"""
|
|
308
340
|
try:
|
|
309
|
-
if
|
|
310
|
-
response = response.replace(
|
|
311
|
-
response = response.replace(
|
|
312
|
-
|
|
341
|
+
if isinstance(response, str):
|
|
342
|
+
response = response.replace("\u2019", "'").replace("\u2018", "'")
|
|
343
|
+
response = response.replace("\u201C", '"').replace("\u201D", '"')
|
|
344
|
+
|
|
313
345
|
# Replace None with null
|
|
314
|
-
response = response.replace(
|
|
315
|
-
|
|
346
|
+
response = response.replace("None", "null")
|
|
347
|
+
|
|
316
348
|
# Escape unescaped single quotes inside string values
|
|
317
349
|
def escape_single_quotes(match):
|
|
318
350
|
s = match.group(0)
|
|
@@ -347,11 +379,16 @@ class Simulator:
|
|
|
347
379
|
"""
|
|
348
380
|
Generates query responses using the specified prompty configuration.
|
|
349
381
|
|
|
350
|
-
:
|
|
351
|
-
:
|
|
352
|
-
:
|
|
353
|
-
:
|
|
354
|
-
:
|
|
382
|
+
:keyword text: The input text for generating queries.
|
|
383
|
+
:paramtype text: str
|
|
384
|
+
:keyword num_queries: The number of queries to generate.
|
|
385
|
+
:paramtype num_queries: int
|
|
386
|
+
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
387
|
+
:paramtype query_response_generating_prompty: Optional[str]
|
|
388
|
+
:keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
|
|
389
|
+
:paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
|
|
390
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
391
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
355
392
|
:return: A list of query-response dictionaries.
|
|
356
393
|
:rtype: List[Dict[str, str]]
|
|
357
394
|
:raises RuntimeError: If an error occurs during query generation.
|
|
@@ -364,7 +401,7 @@ class Simulator:
|
|
|
364
401
|
|
|
365
402
|
try:
|
|
366
403
|
query_responses = query_flow(text=text, num_queries=num_queries)
|
|
367
|
-
if
|
|
404
|
+
if isinstance(query_responses, dict):
|
|
368
405
|
keys = list(query_responses.keys())
|
|
369
406
|
return query_responses[keys[0]]
|
|
370
407
|
return json.loads(query_responses)
|
|
@@ -372,20 +409,33 @@ class Simulator:
|
|
|
372
409
|
raise RuntimeError("Error generating query responses") from e
|
|
373
410
|
|
|
374
411
|
def _load_query_generation_flow(
|
|
375
|
-
self,
|
|
376
|
-
|
|
412
|
+
self,
|
|
413
|
+
*,
|
|
414
|
+
query_response_generating_prompty: Union[str, os.PathLike],
|
|
415
|
+
prompty_model_config: Dict[str, Any],
|
|
416
|
+
query_response_generating_prompty_kwargs: Dict[str, Any],
|
|
417
|
+
) -> Flow:
|
|
377
418
|
"""
|
|
378
419
|
Loads the flow for generating query responses.
|
|
379
420
|
|
|
380
|
-
:
|
|
381
|
-
:
|
|
382
|
-
:
|
|
421
|
+
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
422
|
+
:paramtype query_response_generating_prompty: Union[str, os.PathLike]
|
|
423
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
424
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
425
|
+
:keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
|
|
426
|
+
:paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
|
|
383
427
|
:return: The loaded flow for generating query responses.
|
|
428
|
+
:rtype: Flow
|
|
384
429
|
"""
|
|
385
430
|
if not query_response_generating_prompty:
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
431
|
+
package = "azure.ai.evaluation.simulator._prompty"
|
|
432
|
+
resource_name = "task_query_response.prompty"
|
|
433
|
+
try:
|
|
434
|
+
# Access the resource as a file path
|
|
435
|
+
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
436
|
+
return load_flow(source=str(prompty_path), model=prompty_model_config)
|
|
437
|
+
except FileNotFoundError as e:
|
|
438
|
+
raise f"Flow path for {resource_name} does not exist in package {package}." from e
|
|
389
439
|
return load_flow(
|
|
390
440
|
source=query_response_generating_prompty,
|
|
391
441
|
model=prompty_model_config,
|
|
@@ -400,26 +450,33 @@ class Simulator:
|
|
|
400
450
|
tasks: List[Dict],
|
|
401
451
|
user_simulator_prompty: Optional[str],
|
|
402
452
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
403
|
-
target:
|
|
453
|
+
target: Callable,
|
|
404
454
|
api_call_delay_sec: float,
|
|
405
455
|
) -> List[JsonLineChatProtocol]:
|
|
406
456
|
"""
|
|
407
457
|
Creates full conversations from query-response pairs.
|
|
408
458
|
|
|
409
|
-
:
|
|
410
|
-
:
|
|
411
|
-
:
|
|
412
|
-
:
|
|
413
|
-
:
|
|
414
|
-
:
|
|
415
|
-
:
|
|
459
|
+
:keyword query_responses: A list of query-response pairs.
|
|
460
|
+
:paramtype query_responses: List[Dict[str, str]]
|
|
461
|
+
:keyword max_conversation_turns: The maximum number of conversation turns.
|
|
462
|
+
:paramtype max_conversation_turns: int
|
|
463
|
+
:keyword tasks: A list of tasks for the simulation.
|
|
464
|
+
:paramtype tasks: List[Dict]
|
|
465
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
466
|
+
:paramtype user_simulator_prompty: Optional[str]
|
|
467
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
468
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
469
|
+
:keyword target: The target function to call for responses.
|
|
470
|
+
:paramtype target: Callable
|
|
471
|
+
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
472
|
+
:paramtype api_call_delay_sec: float
|
|
416
473
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
417
474
|
:rtype: List[JsonLineChatProtocol]
|
|
418
475
|
"""
|
|
419
476
|
total_turns = len(query_responses) * max_conversation_turns
|
|
420
477
|
|
|
421
478
|
progress_bar = tqdm(
|
|
422
|
-
total=int(total_turns/2),
|
|
479
|
+
total=int(total_turns / 2),
|
|
423
480
|
desc="Generating: ",
|
|
424
481
|
ncols=100,
|
|
425
482
|
unit="message",
|
|
@@ -466,7 +523,7 @@ class Simulator:
|
|
|
466
523
|
task: str,
|
|
467
524
|
user_simulator_prompty: Optional[str],
|
|
468
525
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
469
|
-
target:
|
|
526
|
+
target: Callable,
|
|
470
527
|
api_call_delay_sec: float,
|
|
471
528
|
progress_bar: tqdm,
|
|
472
529
|
) -> List[Dict[str, str]]:
|
|
@@ -484,7 +541,7 @@ class Simulator:
|
|
|
484
541
|
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
485
542
|
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
486
543
|
:keyword target: The target function to call for responses.
|
|
487
|
-
:paramtype target:
|
|
544
|
+
:paramtype target: Callable
|
|
488
545
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
489
546
|
:paramtype api_call_delay_sec: float
|
|
490
547
|
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
@@ -496,20 +553,23 @@ class Simulator:
|
|
|
496
553
|
# user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
|
|
497
554
|
# conversation_history.add_to_history(user_turn)
|
|
498
555
|
|
|
499
|
-
while conversation_history
|
|
556
|
+
while len(conversation_history) < max_conversation_turns:
|
|
500
557
|
user_flow = self._load_user_simulation_flow(
|
|
501
558
|
user_simulator_prompty=user_simulator_prompty,
|
|
502
559
|
prompty_model_config=self._build_prompty_model_config(),
|
|
503
560
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
504
561
|
)
|
|
505
562
|
conversation_starter_from_simulated_user = user_flow(
|
|
506
|
-
task=task,
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
563
|
+
task=task,
|
|
564
|
+
conversation_history=[
|
|
565
|
+
{
|
|
566
|
+
"role": "assistant",
|
|
567
|
+
"content": conversation_starter,
|
|
568
|
+
"your_task": "Act as the user and translate the content into a user query.",
|
|
569
|
+
}
|
|
570
|
+
],
|
|
511
571
|
)
|
|
512
|
-
if
|
|
572
|
+
if isinstance(conversation_starter_from_simulated_user, dict):
|
|
513
573
|
conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"]
|
|
514
574
|
user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user)
|
|
515
575
|
conversation_history.add_to_history(user_turn)
|
|
@@ -520,7 +580,7 @@ class Simulator:
|
|
|
520
580
|
conversation_history.add_to_history(assistant_turn)
|
|
521
581
|
progress_bar.update(1)
|
|
522
582
|
|
|
523
|
-
if conversation_history
|
|
583
|
+
if len(conversation_history) >= max_conversation_turns:
|
|
524
584
|
break
|
|
525
585
|
|
|
526
586
|
return conversation_history.to_list()
|
|
@@ -536,9 +596,13 @@ class Simulator:
|
|
|
536
596
|
Builds a response from the user simulator based on the current conversation history.
|
|
537
597
|
|
|
538
598
|
:param task: A string representing the task details.
|
|
599
|
+
:type task: str
|
|
539
600
|
:param conversation_history: The current conversation history as a list of dictionaries.
|
|
601
|
+
:type conversation_history: List[Dict[str, Any]]
|
|
540
602
|
:param user_simulator_prompty: Path to the user simulator prompty file.
|
|
603
|
+
:type user_simulator_prompty: Optional[str]
|
|
541
604
|
:param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
605
|
+
:type user_simulator_prompty_kwargs: Dict[str, Any]
|
|
542
606
|
:return: The generated response content from the user simulator.
|
|
543
607
|
:rtype: str
|
|
544
608
|
:raises RuntimeError: If an error occurs during response generation.
|
|
@@ -557,14 +621,17 @@ class Simulator:
|
|
|
557
621
|
raise RuntimeError("Error building user simulation response") from e
|
|
558
622
|
|
|
559
623
|
async def _get_target_response(
|
|
560
|
-
self, *, target:
|
|
624
|
+
self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
|
|
561
625
|
) -> str:
|
|
562
626
|
"""
|
|
563
627
|
Retrieves the response from the target callback based on the current conversation history.
|
|
564
628
|
|
|
565
|
-
:
|
|
566
|
-
:
|
|
567
|
-
:
|
|
629
|
+
:keyword target: The target function to call for a response.
|
|
630
|
+
:paramtype target: Callable
|
|
631
|
+
:keyword api_call_delay_sec: Delay in seconds before retrieving the response.
|
|
632
|
+
:paramtype api_call_delay_sec: float
|
|
633
|
+
:keyword conversation_history: The current conversation history.
|
|
634
|
+
:paramtype conversation_history: ConversationHistory
|
|
568
635
|
:return: The content of the response from the target.
|
|
569
636
|
:rtype: str
|
|
570
637
|
"""
|
|
@@ -4,35 +4,37 @@
|
|
|
4
4
|
# pylint: disable=C0103,C0114,C0116,E0401,E0611
|
|
5
5
|
|
|
6
6
|
import functools
|
|
7
|
+
from typing import Callable, TypeVar
|
|
7
8
|
|
|
8
9
|
from promptflow._sdk._telemetry.activity import ActivityType, monitor_operation
|
|
10
|
+
from typing_extensions import ParamSpec
|
|
11
|
+
|
|
12
|
+
P = ParamSpec("P")
|
|
13
|
+
R = TypeVar("R")
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
def monitor_adversarial_scenario(activity_name: str = "adversarial.simulator.call"):
|
|
12
17
|
"""
|
|
13
18
|
Monitor an adversarial scenario.
|
|
14
19
|
|
|
15
|
-
|
|
16
|
-
activity_name
|
|
20
|
+
:param activity_name: The name of the activity to monitor.
|
|
21
|
+
:type activity_name: str
|
|
22
|
+
:returns: A decorator
|
|
23
|
+
:rtype: Callable[[Callable], Callable]
|
|
17
24
|
"""
|
|
18
25
|
|
|
19
|
-
def decorator(func):
|
|
26
|
+
def decorator(func: Callable[P, R]) -> Callable[P, R]:
|
|
20
27
|
"""
|
|
21
28
|
Decorator for monitoring an adversarial scenario.
|
|
22
29
|
|
|
23
|
-
|
|
24
|
-
func
|
|
30
|
+
:param func: The function to be decorated.
|
|
31
|
+
:type func: Callable[P, R]
|
|
32
|
+
:returns: The decorated function
|
|
33
|
+
:rtype: Callable[P, R]
|
|
25
34
|
"""
|
|
26
35
|
|
|
27
36
|
@functools.wraps(func)
|
|
28
|
-
def wrapper(*args, **kwargs):
|
|
29
|
-
"""
|
|
30
|
-
Wrapper for monitoring an adversarial scenario.
|
|
31
|
-
|
|
32
|
-
Parameters:
|
|
33
|
-
*args: Variable length argument list.
|
|
34
|
-
**kwargs: Arbitrary keyword arguments.
|
|
35
|
-
"""
|
|
37
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
36
38
|
scenario = str(kwargs.get("scenario", None))
|
|
37
39
|
max_conversation_turns = kwargs.get("max_conversation_turns", None)
|
|
38
40
|
max_simulation_results = kwargs.get("max_simulation_results", None)
|
|
@@ -55,23 +57,18 @@ def monitor_adversarial_scenario(activity_name: str = "adversarial.simulator.cal
|
|
|
55
57
|
return decorator
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
def monitor_task_simulator(func):
|
|
60
|
+
def monitor_task_simulator(func: Callable[P, R]) -> Callable[P, R]:
|
|
59
61
|
"""
|
|
60
62
|
Monitor a task simulator.
|
|
61
63
|
|
|
62
|
-
|
|
63
|
-
func
|
|
64
|
+
:param func: The function to be decorated.
|
|
65
|
+
:type func: Callable[P, R]
|
|
66
|
+
:returns: The decorated function
|
|
67
|
+
:rtype: Callable[P, R]
|
|
64
68
|
"""
|
|
65
69
|
|
|
66
70
|
@functools.wraps(func)
|
|
67
|
-
def wrapper(*args, **kwargs):
|
|
68
|
-
"""
|
|
69
|
-
Wrapper for monitoring a task simulator.
|
|
70
|
-
|
|
71
|
-
Parameters:
|
|
72
|
-
*args: Variable length argument list.
|
|
73
|
-
**kwargs: Arbitrary keyword arguments.
|
|
74
|
-
"""
|
|
71
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
75
72
|
text_length = len(kwargs.get("text", ""))
|
|
76
73
|
user_persona_length = len(kwargs.get("user_persona", []))
|
|
77
74
|
num_queries = kwargs.get("num_queries", 0)
|
|
@@ -80,10 +80,13 @@ class JsonLineChatProtocol(dict):
|
|
|
80
80
|
"""
|
|
81
81
|
return json.dumps(self)
|
|
82
82
|
|
|
83
|
-
def to_eval_qa_json_lines(self):
|
|
83
|
+
def to_eval_qa_json_lines(self) -> str:
|
|
84
84
|
"""
|
|
85
85
|
Converts the object to a string of JSON lines suitable for evaluation in a Q&A format.
|
|
86
86
|
The object is expected to be a dictionary with 'messages' key.
|
|
87
|
+
|
|
88
|
+
:returns: A json lines document
|
|
89
|
+
:rtype: str
|
|
87
90
|
"""
|
|
88
91
|
user_message = None
|
|
89
92
|
assistant_message = None
|