azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +4 -4
- azure/ai/evaluation/_common/rai_service.py +4 -4
- azure/ai/evaluation/_common/utils.py +40 -25
- azure/ai/evaluation/_constants.py +13 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
- azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
- azure/ai/evaluation/_evaluate/_evaluate.py +88 -63
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
- azure/ai/evaluation/_evaluate/_utils.py +29 -22
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +34 -86
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
- azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +29 -79
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +33 -85
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
- azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +34 -88
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
- azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +17 -29
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -18
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +16 -91
- azure/ai/evaluation/_exceptions.py +0 -1
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_model_configurations.py +36 -8
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
- azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
- azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
- azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
- azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
- azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +166 -88
- azure/ai/evaluation/simulator/_tracing.py +21 -24
- azure/ai/evaluation/simulator/_utils.py +4 -1
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/METADATA +144 -14
- azure_ai_evaluation-1.0.0b3.dist-info/RECORD +98 -0
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -350
- azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -66
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure_ai_evaluation-1.0.0b1.dist-info/RECORD +0 -97
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt +0 -0
|
@@ -8,11 +8,10 @@ import time
|
|
|
8
8
|
import uuid
|
|
9
9
|
from typing import Dict, List
|
|
10
10
|
|
|
11
|
-
from azure.core.exceptions import HttpResponseError
|
|
12
|
-
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
13
|
-
|
|
14
11
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
|
|
15
12
|
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
13
|
+
from azure.core.exceptions import HttpResponseError
|
|
14
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
16
15
|
|
|
17
16
|
from .models import OpenAIChatCompletionsModel
|
|
18
17
|
|
|
@@ -2,15 +2,14 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import os
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
from urllib.parse import urljoin, urlparse
|
|
7
7
|
|
|
8
|
-
from azure.
|
|
9
|
-
|
|
8
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
9
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
|
|
11
|
-
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
12
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
13
10
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
11
|
+
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
12
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
14
13
|
|
|
15
14
|
from ._identity_manager import APITokenManager
|
|
16
15
|
|
|
@@ -21,7 +20,7 @@ if "RAI_SVC_URL" in os.environ:
|
|
|
21
20
|
print(f"Found RAI_SVC_URL in environment variable, using {api_url} for the service endpoint.")
|
|
22
21
|
|
|
23
22
|
|
|
24
|
-
class RAIClient:
|
|
23
|
+
class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
25
24
|
"""Client for the Responsible AI Service
|
|
26
25
|
|
|
27
26
|
:param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
|
|
@@ -31,7 +30,9 @@ class RAIClient:
|
|
|
31
30
|
:type token_manage: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
|
-
def __init__(
|
|
33
|
+
def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
|
|
34
|
+
self, azure_ai_project: AzureAIProject, token_manager: APITokenManager
|
|
35
|
+
) -> None:
|
|
35
36
|
self.azure_ai_project = azure_ai_project
|
|
36
37
|
self.token_manager = token_manager
|
|
37
38
|
|
|
@@ -73,7 +74,7 @@ class RAIClient:
|
|
|
73
74
|
timeout=5,
|
|
74
75
|
)
|
|
75
76
|
if response.status_code != 200:
|
|
76
|
-
msg =
|
|
77
|
+
msg = "Failed to retrieve the discovery service URL."
|
|
77
78
|
raise EvaluationException(
|
|
78
79
|
message=msg,
|
|
79
80
|
internal_message=msg,
|
|
@@ -104,7 +105,11 @@ class RAIClient:
|
|
|
104
105
|
return self.contentharm_parameters
|
|
105
106
|
|
|
106
107
|
async def get_jailbreaks_dataset(self, type: str) -> Any:
|
|
107
|
-
"Get the jailbreaks dataset, if exists
|
|
108
|
+
"""Get the jailbreaks dataset, if exists
|
|
109
|
+
|
|
110
|
+
:param type: The dataset type. Should be one of 'xpia' or 'upia'
|
|
111
|
+
:type type: str
|
|
112
|
+
"""
|
|
108
113
|
if self.jailbreaks_dataset is None:
|
|
109
114
|
if type == "xpia":
|
|
110
115
|
self.jailbreaks_dataset = await self.get(self.xpia_jailbreaks_json_endpoint)
|
|
@@ -146,8 +151,10 @@ class RAIClient:
|
|
|
146
151
|
if response.status_code == 200:
|
|
147
152
|
return response.json()
|
|
148
153
|
|
|
149
|
-
msg =
|
|
150
|
-
|
|
154
|
+
msg = (
|
|
155
|
+
"Azure safety evaluation service is not available in your current region, "
|
|
156
|
+
+ "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
|
|
157
|
+
)
|
|
151
158
|
raise EvaluationException(
|
|
152
159
|
message=msg,
|
|
153
160
|
internal_message=msg,
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Optional
|
|
6
6
|
|
|
7
7
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
8
8
|
|
|
@@ -12,10 +12,9 @@ from abc import ABC, abstractmethod
|
|
|
12
12
|
from collections import deque
|
|
13
13
|
from typing import Deque, Dict, List, Optional, Union
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
|
-
import ast
|
|
16
15
|
|
|
16
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
17
17
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
18
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
19
18
|
|
|
20
19
|
from ._identity_manager import APITokenManager
|
|
21
20
|
|
|
@@ -29,16 +28,15 @@ def get_model_class_from_url(endpoint_url: str):
|
|
|
29
28
|
|
|
30
29
|
if endpoint_path.endswith("chat/completions"):
|
|
31
30
|
return OpenAIChatCompletionsModel
|
|
32
|
-
|
|
31
|
+
if endpoint_path.endswith("completions"):
|
|
33
32
|
return OpenAICompletionsModel
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
)
|
|
33
|
+
raise EvaluationException(
|
|
34
|
+
message=f"Unknown API type for endpoint {endpoint_url}",
|
|
35
|
+
internal_message="Unknown API type",
|
|
36
|
+
error_category=ErrorCategory.UNKNOWN_FIELD,
|
|
37
|
+
error_blame=ErrorBlame.USER_ERROR,
|
|
38
|
+
error_target=ErrorTarget.MODELS,
|
|
39
|
+
)
|
|
42
40
|
|
|
43
41
|
|
|
44
42
|
# ===========================================================
|
|
File without changes
|
|
@@ -33,7 +33,8 @@ Answer must not be more than 5 words
|
|
|
33
33
|
Answer must be picked from Text as is
|
|
34
34
|
Question should be as descriptive as possible and must include as much context as possible from Text
|
|
35
35
|
Output must always have the provided number of QnAs
|
|
36
|
-
Output must be in JSON format
|
|
36
|
+
Output must be in JSON format.
|
|
37
|
+
Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable.
|
|
37
38
|
Text:
|
|
38
39
|
<|text_start|>
|
|
39
40
|
On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States.
|
|
@@ -3,25 +3,27 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
5
5
|
# ---------------------------------------------------------
|
|
6
|
-
import re
|
|
7
6
|
import asyncio
|
|
7
|
+
import importlib.resources as pkg_resources
|
|
8
8
|
import json
|
|
9
9
|
import os
|
|
10
|
-
|
|
10
|
+
import re
|
|
11
11
|
import warnings
|
|
12
|
-
|
|
13
|
-
from tqdm import tqdm
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
14
13
|
|
|
15
14
|
from promptflow.client import load_flow
|
|
16
|
-
from promptflow.core import AzureOpenAIModelConfiguration
|
|
15
|
+
from promptflow.core import AzureOpenAIModelConfiguration, Flow
|
|
16
|
+
from tqdm import tqdm
|
|
17
17
|
|
|
18
18
|
from .._user_agent import USER_AGENT
|
|
19
19
|
from ._conversation.constants import ConversationRole
|
|
20
|
-
from ._helpers import ConversationHistory, Turn
|
|
20
|
+
from ._helpers import ConversationHistory, Turn, experimental
|
|
21
|
+
|
|
21
22
|
# from ._tracing import monitor_task_simulator
|
|
22
23
|
from ._utils import JsonLineChatProtocol
|
|
23
24
|
|
|
24
25
|
|
|
26
|
+
@experimental
|
|
25
27
|
class Simulator:
|
|
26
28
|
"""
|
|
27
29
|
Simulator for generating synthetic conversations.
|
|
@@ -39,7 +41,7 @@ class Simulator:
|
|
|
39
41
|
"""
|
|
40
42
|
self._validate_project_config(azure_ai_project)
|
|
41
43
|
self.azure_ai_project = azure_ai_project
|
|
42
|
-
self.azure_ai_project["api_version"] = "2024-
|
|
44
|
+
self.azure_ai_project["api_version"] = "2024-06-01"
|
|
43
45
|
self.credential = credential
|
|
44
46
|
|
|
45
47
|
@staticmethod
|
|
@@ -48,6 +50,7 @@ class Simulator:
|
|
|
48
50
|
Validates the azure_ai_project configuration to ensure all required keys are present and have non-None values.
|
|
49
51
|
|
|
50
52
|
:param azure_ai_project: The Azure AI project configuration dictionary.
|
|
53
|
+
:type azure_ai_project: Dict[str, Any]
|
|
51
54
|
:raises ValueError: If required keys are missing or any of the values are None.
|
|
52
55
|
"""
|
|
53
56
|
required_keys = ["subscription_id", "resource_group_name", "project_name"]
|
|
@@ -60,7 +63,7 @@ class Simulator:
|
|
|
60
63
|
async def __call__(
|
|
61
64
|
self,
|
|
62
65
|
*,
|
|
63
|
-
target:
|
|
66
|
+
target: Callable,
|
|
64
67
|
max_conversation_turns: int = 5,
|
|
65
68
|
tasks: List[Dict] = [],
|
|
66
69
|
text: str = "",
|
|
@@ -77,7 +80,7 @@ class Simulator:
|
|
|
77
80
|
Generates synthetic conversations based on provided parameters.
|
|
78
81
|
|
|
79
82
|
:keyword target: The target function to call during the simulation.
|
|
80
|
-
:paramtype target:
|
|
83
|
+
:paramtype target: Callable
|
|
81
84
|
:keyword max_conversation_turns: Maximum number of conversation turns for the simulation. Each turn consists of a user and an assistant message.
|
|
82
85
|
:paramtype max_conversation_turns: int
|
|
83
86
|
:keyword tasks: A list of user tasks, each represented as a list of strings. Text should be relevant for the tasks and facilitate the simulation. One example is to use text to provide context for the tasks.
|
|
@@ -126,7 +129,6 @@ class Simulator:
|
|
|
126
129
|
max_conversation_turns *= 2 # account for both user and assistant turns
|
|
127
130
|
|
|
128
131
|
prompty_model_config = self._build_prompty_model_config()
|
|
129
|
-
|
|
130
132
|
if conversation_turns:
|
|
131
133
|
return await self._simulate_with_predefined_turns(
|
|
132
134
|
target=target,
|
|
@@ -172,7 +174,7 @@ class Simulator:
|
|
|
172
174
|
async def _simulate_with_predefined_turns(
|
|
173
175
|
self,
|
|
174
176
|
*,
|
|
175
|
-
target:
|
|
177
|
+
target: Callable,
|
|
176
178
|
max_conversation_turns: int,
|
|
177
179
|
conversation_turns: List[List[str]],
|
|
178
180
|
user_simulator_prompty: Optional[str],
|
|
@@ -183,19 +185,26 @@ class Simulator:
|
|
|
183
185
|
"""
|
|
184
186
|
Simulates conversations using predefined conversation turns.
|
|
185
187
|
|
|
186
|
-
:
|
|
187
|
-
:
|
|
188
|
-
:
|
|
189
|
-
:
|
|
190
|
-
:
|
|
191
|
-
:
|
|
192
|
-
:
|
|
188
|
+
:keyword target: The target function to call during each turn of the simulation.
|
|
189
|
+
:paramtype target: Callable
|
|
190
|
+
:keyword max_conversation_turns: Maximum number of turns for the simulation.
|
|
191
|
+
:paramtype max_conversation_turns: int
|
|
192
|
+
:keyword conversation_turns: A list of predefined conversation turns.
|
|
193
|
+
:paramtype conversation_turns: List[List[str]]
|
|
194
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
195
|
+
:paramtype user_simulator_prompty: Optional[str]
|
|
196
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
197
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
198
|
+
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
199
|
+
:paramtype api_call_delay_sec: float
|
|
200
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
201
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
193
202
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
194
203
|
:rtype: List[JsonLineChatProtocol]
|
|
195
204
|
"""
|
|
196
205
|
simulated_conversations = []
|
|
197
206
|
progress_bar = tqdm(
|
|
198
|
-
total=int(len(conversation_turns) * (max_conversation_turns/2)),
|
|
207
|
+
total=int(len(conversation_turns) * (max_conversation_turns / 2)),
|
|
199
208
|
desc="Simulating with predefined conversation turns: ",
|
|
200
209
|
ncols=100,
|
|
201
210
|
unit="messages",
|
|
@@ -213,7 +222,7 @@ class Simulator:
|
|
|
213
222
|
current_simulation.add_to_history(assistant_turn)
|
|
214
223
|
progress_bar.update(1) # Update progress bar for both user and assistant turns
|
|
215
224
|
|
|
216
|
-
if current_simulation
|
|
225
|
+
if len(current_simulation) < max_conversation_turns:
|
|
217
226
|
await self._extend_conversation_with_simulator(
|
|
218
227
|
current_simulation=current_simulation,
|
|
219
228
|
max_conversation_turns=max_conversation_turns,
|
|
@@ -224,8 +233,16 @@ class Simulator:
|
|
|
224
233
|
target=target,
|
|
225
234
|
progress_bar=progress_bar,
|
|
226
235
|
)
|
|
227
|
-
|
|
228
|
-
|
|
236
|
+
simulated_conversations.append(
|
|
237
|
+
JsonLineChatProtocol(
|
|
238
|
+
{
|
|
239
|
+
"messages": current_simulation.to_list(),
|
|
240
|
+
"finish_reason": ["stop"],
|
|
241
|
+
"context": {},
|
|
242
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
)
|
|
229
246
|
|
|
230
247
|
progress_bar.close()
|
|
231
248
|
return simulated_conversations
|
|
@@ -239,20 +256,28 @@ class Simulator:
|
|
|
239
256
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
240
257
|
api_call_delay_sec: float,
|
|
241
258
|
prompty_model_config: Dict[str, Any],
|
|
242
|
-
target:
|
|
259
|
+
target: Callable,
|
|
243
260
|
progress_bar: tqdm,
|
|
244
261
|
):
|
|
245
262
|
"""
|
|
246
263
|
Extends an ongoing conversation using a user simulator until the maximum number of turns is reached.
|
|
247
264
|
|
|
248
|
-
:
|
|
249
|
-
:
|
|
250
|
-
:
|
|
251
|
-
:
|
|
252
|
-
:
|
|
253
|
-
:
|
|
254
|
-
:
|
|
255
|
-
:
|
|
265
|
+
:keyword current_simulation: The current state of the conversation history.
|
|
266
|
+
:paramtype current_simulation: ConversationHistory,
|
|
267
|
+
:keyword max_conversation_turns: The maximum number of conversation turns.
|
|
268
|
+
:paramtype max_conversation_turns: int,
|
|
269
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
270
|
+
:paramtype user_simulator_prompty: Optional[str],
|
|
271
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
272
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any],
|
|
273
|
+
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
274
|
+
:paramtype api_call_delay_sec: float,
|
|
275
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
276
|
+
:paramtype prompty_model_config: Dict[str, Any],
|
|
277
|
+
:keyword target: The target function to call for responses.
|
|
278
|
+
:paramtype target: Callable,
|
|
279
|
+
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
280
|
+
:paramtype progress_bar: tqdm,
|
|
256
281
|
"""
|
|
257
282
|
user_flow = self._load_user_simulation_flow(
|
|
258
283
|
user_simulator_prompty=user_simulator_prompty,
|
|
@@ -260,9 +285,11 @@ class Simulator:
|
|
|
260
285
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
261
286
|
)
|
|
262
287
|
|
|
263
|
-
while current_simulation
|
|
288
|
+
while len(current_simulation) < max_conversation_turns:
|
|
264
289
|
user_response_content = user_flow(
|
|
265
|
-
task="Continue the conversation",
|
|
290
|
+
task="Continue the conversation",
|
|
291
|
+
conversation_history=current_simulation.to_list(),
|
|
292
|
+
**user_simulator_prompty_kwargs,
|
|
266
293
|
)
|
|
267
294
|
user_response = self._parse_prompty_response(response=user_response_content)
|
|
268
295
|
user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
|
|
@@ -276,20 +303,34 @@ class Simulator:
|
|
|
276
303
|
progress_bar.update(1)
|
|
277
304
|
|
|
278
305
|
def _load_user_simulation_flow(
|
|
279
|
-
self,
|
|
280
|
-
|
|
306
|
+
self,
|
|
307
|
+
*,
|
|
308
|
+
user_simulator_prompty: Union[str, os.PathLike],
|
|
309
|
+
prompty_model_config: Dict[str, Any],
|
|
310
|
+
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
311
|
+
) -> Flow:
|
|
281
312
|
"""
|
|
282
313
|
Loads the flow for simulating user interactions.
|
|
283
314
|
|
|
284
|
-
:
|
|
285
|
-
:
|
|
286
|
-
:
|
|
315
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
316
|
+
:paramtype user_simulator_prompty: Union[str, os.PathLike]
|
|
317
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
318
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
319
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
320
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
287
321
|
:return: The loaded flow for simulating user interactions.
|
|
322
|
+
:rtype: Flow
|
|
288
323
|
"""
|
|
289
324
|
if not user_simulator_prompty:
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
325
|
+
package = "azure.ai.evaluation.simulator._prompty"
|
|
326
|
+
resource_name = "task_simulate.prompty"
|
|
327
|
+
try:
|
|
328
|
+
# Access the resource as a file path
|
|
329
|
+
# pylint: disable=deprecated-method
|
|
330
|
+
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
331
|
+
return load_flow(source=str(prompty_path), model=prompty_model_config)
|
|
332
|
+
except FileNotFoundError as e:
|
|
333
|
+
raise f"Flow path for {resource_name} does not exist in package {package}." from e
|
|
293
334
|
return load_flow(
|
|
294
335
|
source=user_simulator_prompty,
|
|
295
336
|
model=prompty_model_config,
|
|
@@ -300,19 +341,20 @@ class Simulator:
|
|
|
300
341
|
"""
|
|
301
342
|
Parses the response from the prompty execution.
|
|
302
343
|
|
|
303
|
-
:
|
|
344
|
+
:keyword response: The raw response from the prompty.
|
|
345
|
+
:paramtype str: str
|
|
304
346
|
:return: A dictionary representing the parsed response content.
|
|
305
347
|
:rtype: Dict[str, Any]
|
|
306
348
|
:raises ValueError: If the response cannot be parsed.
|
|
307
349
|
"""
|
|
308
350
|
try:
|
|
309
|
-
if
|
|
310
|
-
response = response.replace(
|
|
311
|
-
response = response.replace(
|
|
312
|
-
|
|
351
|
+
if isinstance(response, str):
|
|
352
|
+
response = response.replace("\u2019", "'").replace("\u2018", "'")
|
|
353
|
+
response = response.replace("\u201C", '"').replace("\u201D", '"')
|
|
354
|
+
|
|
313
355
|
# Replace None with null
|
|
314
|
-
response = response.replace(
|
|
315
|
-
|
|
356
|
+
response = response.replace("None", "null")
|
|
357
|
+
|
|
316
358
|
# Escape unescaped single quotes inside string values
|
|
317
359
|
def escape_single_quotes(match):
|
|
318
360
|
s = match.group(0)
|
|
@@ -347,11 +389,16 @@ class Simulator:
|
|
|
347
389
|
"""
|
|
348
390
|
Generates query responses using the specified prompty configuration.
|
|
349
391
|
|
|
350
|
-
:
|
|
351
|
-
:
|
|
352
|
-
:
|
|
353
|
-
:
|
|
354
|
-
:
|
|
392
|
+
:keyword text: The input text for generating queries.
|
|
393
|
+
:paramtype text: str
|
|
394
|
+
:keyword num_queries: The number of queries to generate.
|
|
395
|
+
:paramtype num_queries: int
|
|
396
|
+
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
397
|
+
:paramtype query_response_generating_prompty: Optional[str]
|
|
398
|
+
:keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
|
|
399
|
+
:paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
|
|
400
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
401
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
355
402
|
:return: A list of query-response dictionaries.
|
|
356
403
|
:rtype: List[Dict[str, str]]
|
|
357
404
|
:raises RuntimeError: If an error occurs during query generation.
|
|
@@ -361,10 +408,9 @@ class Simulator:
|
|
|
361
408
|
prompty_model_config=prompty_model_config,
|
|
362
409
|
query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
|
|
363
410
|
)
|
|
364
|
-
|
|
365
411
|
try:
|
|
366
412
|
query_responses = query_flow(text=text, num_queries=num_queries)
|
|
367
|
-
if
|
|
413
|
+
if isinstance(query_responses, dict):
|
|
368
414
|
keys = list(query_responses.keys())
|
|
369
415
|
return query_responses[keys[0]]
|
|
370
416
|
return json.loads(query_responses)
|
|
@@ -372,20 +418,34 @@ class Simulator:
|
|
|
372
418
|
raise RuntimeError("Error generating query responses") from e
|
|
373
419
|
|
|
374
420
|
def _load_query_generation_flow(
|
|
375
|
-
self,
|
|
376
|
-
|
|
421
|
+
self,
|
|
422
|
+
*,
|
|
423
|
+
query_response_generating_prompty: Union[str, os.PathLike],
|
|
424
|
+
prompty_model_config: Dict[str, Any],
|
|
425
|
+
query_response_generating_prompty_kwargs: Dict[str, Any],
|
|
426
|
+
) -> Flow:
|
|
377
427
|
"""
|
|
378
428
|
Loads the flow for generating query responses.
|
|
379
429
|
|
|
380
|
-
:
|
|
381
|
-
:
|
|
382
|
-
:
|
|
430
|
+
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
431
|
+
:paramtype query_response_generating_prompty: Union[str, os.PathLike]
|
|
432
|
+
:keyword prompty_model_config: The configuration for the prompty model.
|
|
433
|
+
:paramtype prompty_model_config: Dict[str, Any]
|
|
434
|
+
:keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
|
|
435
|
+
:paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
|
|
383
436
|
:return: The loaded flow for generating query responses.
|
|
437
|
+
:rtype: Flow
|
|
384
438
|
"""
|
|
385
439
|
if not query_response_generating_prompty:
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
440
|
+
package = "azure.ai.evaluation.simulator._prompty"
|
|
441
|
+
resource_name = "task_query_response.prompty"
|
|
442
|
+
try:
|
|
443
|
+
# Access the resource as a file path
|
|
444
|
+
# pylint: disable=deprecated-method
|
|
445
|
+
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
446
|
+
return load_flow(source=str(prompty_path), model=prompty_model_config)
|
|
447
|
+
except FileNotFoundError as e:
|
|
448
|
+
raise f"Flow path for {resource_name} does not exist in package {package}." from e
|
|
389
449
|
return load_flow(
|
|
390
450
|
source=query_response_generating_prompty,
|
|
391
451
|
model=prompty_model_config,
|
|
@@ -400,26 +460,33 @@ class Simulator:
|
|
|
400
460
|
tasks: List[Dict],
|
|
401
461
|
user_simulator_prompty: Optional[str],
|
|
402
462
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
403
|
-
target:
|
|
463
|
+
target: Callable,
|
|
404
464
|
api_call_delay_sec: float,
|
|
405
465
|
) -> List[JsonLineChatProtocol]:
|
|
406
466
|
"""
|
|
407
467
|
Creates full conversations from query-response pairs.
|
|
408
468
|
|
|
409
|
-
:
|
|
410
|
-
:
|
|
411
|
-
:
|
|
412
|
-
:
|
|
413
|
-
:
|
|
414
|
-
:
|
|
415
|
-
:
|
|
469
|
+
:keyword query_responses: A list of query-response pairs.
|
|
470
|
+
:paramtype query_responses: List[Dict[str, str]]
|
|
471
|
+
:keyword max_conversation_turns: The maximum number of conversation turns.
|
|
472
|
+
:paramtype max_conversation_turns: int
|
|
473
|
+
:keyword tasks: A list of tasks for the simulation.
|
|
474
|
+
:paramtype tasks: List[Dict]
|
|
475
|
+
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
476
|
+
:paramtype user_simulator_prompty: Optional[str]
|
|
477
|
+
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
478
|
+
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
479
|
+
:keyword target: The target function to call for responses.
|
|
480
|
+
:paramtype target: Callable
|
|
481
|
+
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
482
|
+
:paramtype api_call_delay_sec: float
|
|
416
483
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
417
484
|
:rtype: List[JsonLineChatProtocol]
|
|
418
485
|
"""
|
|
419
486
|
total_turns = len(query_responses) * max_conversation_turns
|
|
420
487
|
|
|
421
488
|
progress_bar = tqdm(
|
|
422
|
-
total=int(total_turns/2),
|
|
489
|
+
total=int(total_turns / 2),
|
|
423
490
|
desc="Generating: ",
|
|
424
491
|
ncols=100,
|
|
425
492
|
unit="message",
|
|
@@ -466,7 +533,7 @@ class Simulator:
|
|
|
466
533
|
task: str,
|
|
467
534
|
user_simulator_prompty: Optional[str],
|
|
468
535
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
469
|
-
target:
|
|
536
|
+
target: Callable,
|
|
470
537
|
api_call_delay_sec: float,
|
|
471
538
|
progress_bar: tqdm,
|
|
472
539
|
) -> List[Dict[str, str]]:
|
|
@@ -484,7 +551,7 @@ class Simulator:
|
|
|
484
551
|
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
485
552
|
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
486
553
|
:keyword target: The target function to call for responses.
|
|
487
|
-
:paramtype target:
|
|
554
|
+
:paramtype target: Callable
|
|
488
555
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
489
556
|
:paramtype api_call_delay_sec: float
|
|
490
557
|
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
@@ -496,20 +563,23 @@ class Simulator:
|
|
|
496
563
|
# user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
|
|
497
564
|
# conversation_history.add_to_history(user_turn)
|
|
498
565
|
|
|
499
|
-
while conversation_history
|
|
566
|
+
while len(conversation_history) < max_conversation_turns:
|
|
500
567
|
user_flow = self._load_user_simulation_flow(
|
|
501
568
|
user_simulator_prompty=user_simulator_prompty,
|
|
502
569
|
prompty_model_config=self._build_prompty_model_config(),
|
|
503
570
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
504
571
|
)
|
|
505
572
|
conversation_starter_from_simulated_user = user_flow(
|
|
506
|
-
task=task,
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
573
|
+
task=task,
|
|
574
|
+
conversation_history=[
|
|
575
|
+
{
|
|
576
|
+
"role": "assistant",
|
|
577
|
+
"content": conversation_starter,
|
|
578
|
+
"your_task": "Act as the user and translate the content into a user query.",
|
|
579
|
+
}
|
|
580
|
+
],
|
|
511
581
|
)
|
|
512
|
-
if
|
|
582
|
+
if isinstance(conversation_starter_from_simulated_user, dict):
|
|
513
583
|
conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"]
|
|
514
584
|
user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user)
|
|
515
585
|
conversation_history.add_to_history(user_turn)
|
|
@@ -520,7 +590,7 @@ class Simulator:
|
|
|
520
590
|
conversation_history.add_to_history(assistant_turn)
|
|
521
591
|
progress_bar.update(1)
|
|
522
592
|
|
|
523
|
-
if conversation_history
|
|
593
|
+
if len(conversation_history) >= max_conversation_turns:
|
|
524
594
|
break
|
|
525
595
|
|
|
526
596
|
return conversation_history.to_list()
|
|
@@ -536,9 +606,13 @@ class Simulator:
|
|
|
536
606
|
Builds a response from the user simulator based on the current conversation history.
|
|
537
607
|
|
|
538
608
|
:param task: A string representing the task details.
|
|
609
|
+
:type task: str
|
|
539
610
|
:param conversation_history: The current conversation history as a list of dictionaries.
|
|
611
|
+
:type conversation_history: List[Dict[str, Any]]
|
|
540
612
|
:param user_simulator_prompty: Path to the user simulator prompty file.
|
|
613
|
+
:type user_simulator_prompty: Optional[str]
|
|
541
614
|
:param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
615
|
+
:type user_simulator_prompty_kwargs: Dict[str, Any]
|
|
542
616
|
:return: The generated response content from the user simulator.
|
|
543
617
|
:rtype: str
|
|
544
618
|
:raises RuntimeError: If an error occurs during response generation.
|
|
@@ -548,23 +622,27 @@ class Simulator:
|
|
|
548
622
|
prompty_model_config=self._build_prompty_model_config(),
|
|
549
623
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
550
624
|
)
|
|
551
|
-
|
|
552
625
|
try:
|
|
553
|
-
response_content = user_flow(
|
|
626
|
+
response_content = user_flow(
|
|
627
|
+
task=task, conversation_history=conversation_history, **user_simulator_prompty_kwargs
|
|
628
|
+
)
|
|
554
629
|
user_response = self._parse_prompty_response(response=response_content)
|
|
555
630
|
return user_response["content"]
|
|
556
631
|
except Exception as e:
|
|
557
632
|
raise RuntimeError("Error building user simulation response") from e
|
|
558
633
|
|
|
559
634
|
async def _get_target_response(
|
|
560
|
-
self, *, target:
|
|
635
|
+
self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
|
|
561
636
|
) -> str:
|
|
562
637
|
"""
|
|
563
638
|
Retrieves the response from the target callback based on the current conversation history.
|
|
564
639
|
|
|
565
|
-
:
|
|
566
|
-
:
|
|
567
|
-
:
|
|
640
|
+
:keyword target: The target function to call for a response.
|
|
641
|
+
:paramtype target: Callable
|
|
642
|
+
:keyword api_call_delay_sec: Delay in seconds before retrieving the response.
|
|
643
|
+
:paramtype api_call_delay_sec: float
|
|
644
|
+
:keyword conversation_history: The current conversation history.
|
|
645
|
+
:paramtype conversation_history: ConversationHistory
|
|
568
646
|
:return: The content of the response from the target.
|
|
569
647
|
:rtype: str
|
|
570
648
|
"""
|