azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +9 -5
- azure/ai/evaluation/_common/constants.py +4 -2
- azure/ai/evaluation/_common/math.py +18 -0
- azure/ai/evaluation/_common/rai_service.py +54 -62
- azure/ai/evaluation/_common/utils.py +201 -16
- azure/ai/evaluation/_constants.py +12 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +10 -3
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +33 -17
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +17 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +26 -10
- azure/ai/evaluation/_evaluate/_evaluate.py +161 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +16 -17
- azure/ai/evaluation/_evaluate/_utils.py +44 -25
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +33 -79
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +331 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +76 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +97 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -20
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +63 -42
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
- azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +30 -74
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +34 -80
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
- azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -3
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +35 -83
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
- azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +25 -28
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +23 -17
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +15 -90
- azure/ai/evaluation/_exceptions.py +9 -7
- azure/ai/evaluation/_http_utils.py +203 -132
- azure/ai/evaluation/_model_configurations.py +37 -9
- azure/ai/evaluation/{_evaluators/_chat/retrieval → _vendor}/__init__.py +0 -6
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +85 -60
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
- azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
- azure/ai/evaluation/simulator/_helpers/_experimental.py +20 -9
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +4 -4
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +22 -64
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
- azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -6
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -4
- azure/ai/evaluation/simulator/_simulator.py +127 -117
- azure/ai/evaluation/simulator/_tracing.py +4 -4
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/METADATA +129 -43
- azure_ai_evaluation-1.0.0b4.dist-info/NOTICE.txt +50 -0
- azure_ai_evaluation-1.0.0b4.dist-info/RECORD +106 -0
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/top_level.txt +0 -0
|
@@ -1,25 +1,24 @@
|
|
|
1
1
|
# flake8: noqa
|
|
2
|
-
# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611
|
|
2
|
+
# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0114,R0913,E0702,R0903,C0411
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
5
5
|
# ---------------------------------------------------------
|
|
6
6
|
import asyncio
|
|
7
7
|
import importlib.resources as pkg_resources
|
|
8
|
+
from tqdm import tqdm
|
|
8
9
|
import json
|
|
9
10
|
import os
|
|
10
11
|
import re
|
|
11
12
|
import warnings
|
|
12
13
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
14
|
+
from promptflow.core import AsyncPrompty
|
|
15
|
+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
16
|
+
from azure.ai.evaluation._common.utils import construct_prompty_model_config
|
|
13
17
|
|
|
14
|
-
from
|
|
15
|
-
from promptflow.core import AzureOpenAIModelConfiguration, Flow
|
|
16
|
-
from tqdm import tqdm
|
|
17
|
-
|
|
18
|
+
from .._exceptions import ErrorBlame, ErrorCategory, EvaluationException
|
|
18
19
|
from .._user_agent import USER_AGENT
|
|
19
20
|
from ._conversation.constants import ConversationRole
|
|
20
21
|
from ._helpers import ConversationHistory, Turn, experimental
|
|
21
|
-
|
|
22
|
-
# from ._tracing import monitor_task_simulator
|
|
23
22
|
from ._utils import JsonLineChatProtocol
|
|
24
23
|
|
|
25
24
|
|
|
@@ -29,43 +28,60 @@ class Simulator:
|
|
|
29
28
|
Simulator for generating synthetic conversations.
|
|
30
29
|
"""
|
|
31
30
|
|
|
32
|
-
def __init__(self,
|
|
31
|
+
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
|
|
33
32
|
"""
|
|
34
|
-
Initializes the task simulator with
|
|
33
|
+
Initializes the task simulator with the model configuration.
|
|
35
34
|
|
|
36
|
-
:param
|
|
37
|
-
|
|
38
|
-
:
|
|
39
|
-
:paramtype credential: Optional[Any]
|
|
40
|
-
:raises ValueError: If the azure_ai_project does not contain the required keys or any value is None.
|
|
35
|
+
:param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration.
|
|
36
|
+
:type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration, ~azure.ai.evaluation.OpenAIModelConfiguration]
|
|
37
|
+
:raises ValueError: If the model_config does not contain the required keys or any value is None.
|
|
41
38
|
"""
|
|
42
|
-
self.
|
|
43
|
-
self.
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
self._validate_model_config(model_config)
|
|
40
|
+
self.model_config = model_config
|
|
41
|
+
if "api_version" not in self.model_config:
|
|
42
|
+
self.model_config["api_version"] = "2024-06-01" # type: ignore
|
|
46
43
|
|
|
47
44
|
@staticmethod
|
|
48
|
-
def
|
|
45
|
+
def _validate_model_config(model_config: Any):
|
|
49
46
|
"""
|
|
50
|
-
Validates the
|
|
47
|
+
Validates the model_config to ensure all required keys are present and have non-None values.
|
|
48
|
+
If 'type' is not specified, it will attempt to infer the type based on the keys present.
|
|
51
49
|
|
|
52
|
-
:param
|
|
53
|
-
:type
|
|
50
|
+
:param model_config: The model configuration dictionary.
|
|
51
|
+
:type model_config: Dict[str, Any]
|
|
54
52
|
:raises ValueError: If required keys are missing or any of the values are None.
|
|
55
53
|
"""
|
|
56
|
-
|
|
57
|
-
if not
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
54
|
+
# Attempt to infer 'type' if not provided
|
|
55
|
+
if "type" not in model_config:
|
|
56
|
+
if "azure_deployment" in model_config and "azure_endpoint" in model_config:
|
|
57
|
+
model_config["type"] = "azure_openai"
|
|
58
|
+
elif "model" in model_config:
|
|
59
|
+
model_config["type"] = "openai"
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
"Unable to infer 'type' from model_config. Please specify 'type' as 'azure_openai' or 'openai'."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if model_config["type"] == "azure_openai":
|
|
66
|
+
required_keys = ["azure_deployment", "azure_endpoint"]
|
|
67
|
+
elif model_config["type"] == "openai":
|
|
68
|
+
required_keys = ["api_key", "model"]
|
|
69
|
+
else:
|
|
70
|
+
raise ValueError("model_config 'type' must be 'azure_openai' or 'openai'.")
|
|
71
|
+
|
|
72
|
+
missing_keys = [key for key in required_keys if key not in model_config]
|
|
73
|
+
if missing_keys:
|
|
74
|
+
raise ValueError(f"model_config is missing required keys: {', '.join(missing_keys)}")
|
|
75
|
+
none_keys = [key for key in required_keys if model_config.get(key) is None]
|
|
76
|
+
if none_keys:
|
|
77
|
+
raise ValueError(f"The following keys in model_config must not be None: {', '.join(none_keys)}")
|
|
61
78
|
|
|
62
|
-
# @monitor_task_simulator
|
|
63
79
|
async def __call__(
|
|
64
80
|
self,
|
|
65
81
|
*,
|
|
66
82
|
target: Callable,
|
|
67
83
|
max_conversation_turns: int = 5,
|
|
68
|
-
tasks: List[
|
|
84
|
+
tasks: List[str] = [],
|
|
69
85
|
text: str = "",
|
|
70
86
|
num_queries: int = 5,
|
|
71
87
|
query_response_generating_prompty: Optional[str] = None,
|
|
@@ -109,7 +125,7 @@ class Simulator:
|
|
|
109
125
|
|
|
110
126
|
Modes:
|
|
111
127
|
- Task-Free Mode: When only num_queries is specified and tasks is not, the method generates num_queries x max_conversation_turns lines of simulated data grounded in the context of the text.
|
|
112
|
-
- Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines
|
|
128
|
+
- Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines will be simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used.
|
|
113
129
|
- Conversation Starter Mode: When conversation_turns are specified, the method starts each conversation with the user-specified queries and then follows the conversation history for the remaining turns.
|
|
114
130
|
"""
|
|
115
131
|
if conversation_turns and (text or tasks):
|
|
@@ -128,8 +144,7 @@ class Simulator:
|
|
|
128
144
|
num_queries = min(num_queries, len(tasks))
|
|
129
145
|
max_conversation_turns *= 2 # account for both user and assistant turns
|
|
130
146
|
|
|
131
|
-
prompty_model_config = self.
|
|
132
|
-
|
|
147
|
+
prompty_model_config = self.model_config
|
|
133
148
|
if conversation_turns:
|
|
134
149
|
return await self._simulate_with_predefined_turns(
|
|
135
150
|
target=target,
|
|
@@ -149,7 +164,6 @@ class Simulator:
|
|
|
149
164
|
prompty_model_config=prompty_model_config,
|
|
150
165
|
**kwargs,
|
|
151
166
|
)
|
|
152
|
-
|
|
153
167
|
return await self._create_conversations_from_query_responses(
|
|
154
168
|
query_responses=query_responses,
|
|
155
169
|
max_conversation_turns=max_conversation_turns,
|
|
@@ -160,18 +174,6 @@ class Simulator:
|
|
|
160
174
|
api_call_delay_sec=api_call_delay_sec,
|
|
161
175
|
)
|
|
162
176
|
|
|
163
|
-
def _build_prompty_model_config(self) -> Dict[str, Any]:
|
|
164
|
-
"""
|
|
165
|
-
Constructs the configuration for the prompty model.
|
|
166
|
-
|
|
167
|
-
:return: A dictionary containing the prompty model configuration, including API version and user agent headers if applicable.
|
|
168
|
-
:rtype: Dict[str, Any]
|
|
169
|
-
"""
|
|
170
|
-
config = {"configuration": self.azure_ai_project}
|
|
171
|
-
if USER_AGENT and isinstance(self.azure_ai_project, AzureOpenAIModelConfiguration):
|
|
172
|
-
config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})
|
|
173
|
-
return config
|
|
174
|
-
|
|
175
177
|
async def _simulate_with_predefined_turns(
|
|
176
178
|
self,
|
|
177
179
|
*,
|
|
@@ -181,7 +183,7 @@ class Simulator:
|
|
|
181
183
|
user_simulator_prompty: Optional[str],
|
|
182
184
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
183
185
|
api_call_delay_sec: float,
|
|
184
|
-
prompty_model_config:
|
|
186
|
+
prompty_model_config: Any,
|
|
185
187
|
) -> List[JsonLineChatProtocol]:
|
|
186
188
|
"""
|
|
187
189
|
Simulates conversations using predefined conversation turns.
|
|
@@ -199,7 +201,7 @@ class Simulator:
|
|
|
199
201
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
200
202
|
:paramtype api_call_delay_sec: float
|
|
201
203
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
202
|
-
:paramtype prompty_model_config:
|
|
204
|
+
:paramtype prompty_model_config: Any
|
|
203
205
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
204
206
|
:rtype: List[JsonLineChatProtocol]
|
|
205
207
|
"""
|
|
@@ -234,8 +236,16 @@ class Simulator:
|
|
|
234
236
|
target=target,
|
|
235
237
|
progress_bar=progress_bar,
|
|
236
238
|
)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
+
simulated_conversations.append(
|
|
240
|
+
JsonLineChatProtocol(
|
|
241
|
+
{
|
|
242
|
+
"messages": current_simulation.to_list(),
|
|
243
|
+
"finish_reason": ["stop"],
|
|
244
|
+
"context": {},
|
|
245
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
246
|
+
}
|
|
247
|
+
)
|
|
248
|
+
)
|
|
239
249
|
|
|
240
250
|
progress_bar.close()
|
|
241
251
|
return simulated_conversations
|
|
@@ -273,14 +283,16 @@ class Simulator:
|
|
|
273
283
|
:paramtype progress_bar: tqdm,
|
|
274
284
|
"""
|
|
275
285
|
user_flow = self._load_user_simulation_flow(
|
|
276
|
-
user_simulator_prompty=user_simulator_prompty,
|
|
286
|
+
user_simulator_prompty=user_simulator_prompty, # type: ignore
|
|
277
287
|
prompty_model_config=prompty_model_config,
|
|
278
288
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
279
289
|
)
|
|
280
290
|
|
|
281
291
|
while len(current_simulation) < max_conversation_turns:
|
|
282
|
-
user_response_content = user_flow(
|
|
283
|
-
task="Continue the conversation",
|
|
292
|
+
user_response_content = await user_flow(
|
|
293
|
+
task="Continue the conversation",
|
|
294
|
+
conversation_history=current_simulation.to_list(),
|
|
295
|
+
**user_simulator_prompty_kwargs,
|
|
284
296
|
)
|
|
285
297
|
user_response = self._parse_prompty_response(response=user_response_content)
|
|
286
298
|
user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
|
|
@@ -296,43 +308,60 @@ class Simulator:
|
|
|
296
308
|
def _load_user_simulation_flow(
|
|
297
309
|
self,
|
|
298
310
|
*,
|
|
299
|
-
user_simulator_prompty: Union[str, os.PathLike],
|
|
311
|
+
user_simulator_prompty: Optional[Union[str, os.PathLike]],
|
|
300
312
|
prompty_model_config: Dict[str, Any],
|
|
301
313
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
302
|
-
) ->
|
|
314
|
+
) -> "AsyncPrompty": # type: ignore
|
|
303
315
|
"""
|
|
304
316
|
Loads the flow for simulating user interactions.
|
|
305
317
|
|
|
306
318
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
307
|
-
:paramtype user_simulator_prompty: Union[str, os.PathLike]
|
|
319
|
+
:paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]]
|
|
308
320
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
309
321
|
:paramtype prompty_model_config: Dict[str, Any]
|
|
310
322
|
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
311
323
|
:paramtype user_simulator_prompty_kwargs: Dict[str, Any]
|
|
312
324
|
:return: The loaded flow for simulating user interactions.
|
|
313
|
-
:rtype:
|
|
325
|
+
:rtype: AsyncPrompty
|
|
314
326
|
"""
|
|
315
327
|
if not user_simulator_prompty:
|
|
316
328
|
package = "azure.ai.evaluation.simulator._prompty"
|
|
317
329
|
resource_name = "task_simulate.prompty"
|
|
318
330
|
try:
|
|
319
331
|
# Access the resource as a file path
|
|
332
|
+
# pylint: disable=deprecated-method
|
|
320
333
|
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
321
|
-
|
|
334
|
+
prompty_model_config = construct_prompty_model_config(
|
|
335
|
+
model_config=prompty_model_config, # type: ignore
|
|
336
|
+
default_api_version="2024-06-01",
|
|
337
|
+
user_agent=USER_AGENT,
|
|
338
|
+
)
|
|
339
|
+
return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
|
|
322
340
|
except FileNotFoundError as e:
|
|
323
|
-
|
|
324
|
-
|
|
341
|
+
msg = f"Flow path for {resource_name} does not exist in package {package}."
|
|
342
|
+
raise EvaluationException(
|
|
343
|
+
message=msg,
|
|
344
|
+
internal_message=msg,
|
|
345
|
+
error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
|
|
346
|
+
blame=ErrorBlame.USER_ERROR,
|
|
347
|
+
) from e
|
|
348
|
+
prompty_model_config = construct_prompty_model_config(
|
|
349
|
+
model_config=prompty_model_config, # type: ignore
|
|
350
|
+
default_api_version="2024-06-01",
|
|
351
|
+
user_agent=USER_AGENT,
|
|
352
|
+
)
|
|
353
|
+
return AsyncPrompty.load(
|
|
325
354
|
source=user_simulator_prompty,
|
|
326
355
|
model=prompty_model_config,
|
|
327
356
|
**user_simulator_prompty_kwargs,
|
|
328
|
-
)
|
|
357
|
+
) # type: ignore
|
|
329
358
|
|
|
330
359
|
def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]:
|
|
331
360
|
"""
|
|
332
361
|
Parses the response from the prompty execution.
|
|
333
362
|
|
|
334
363
|
:keyword response: The raw response from the prompty.
|
|
335
|
-
:paramtype
|
|
364
|
+
:paramtype response: str
|
|
336
365
|
:return: A dictionary representing the parsed response content.
|
|
337
366
|
:rtype: Dict[str, Any]
|
|
338
367
|
:raises ValueError: If the response cannot be parsed.
|
|
@@ -373,7 +402,7 @@ class Simulator:
|
|
|
373
402
|
num_queries: int,
|
|
374
403
|
query_response_generating_prompty: Optional[str],
|
|
375
404
|
query_response_generating_prompty_kwargs: Dict[str, Any],
|
|
376
|
-
prompty_model_config:
|
|
405
|
+
prompty_model_config: Any,
|
|
377
406
|
**kwargs,
|
|
378
407
|
) -> List[Dict[str, str]]:
|
|
379
408
|
"""
|
|
@@ -388,19 +417,18 @@ class Simulator:
|
|
|
388
417
|
:keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
|
|
389
418
|
:paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
|
|
390
419
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
391
|
-
:paramtype prompty_model_config:
|
|
420
|
+
:paramtype prompty_model_config: Any
|
|
392
421
|
:return: A list of query-response dictionaries.
|
|
393
422
|
:rtype: List[Dict[str, str]]
|
|
394
423
|
:raises RuntimeError: If an error occurs during query generation.
|
|
395
424
|
"""
|
|
396
425
|
query_flow = self._load_query_generation_flow(
|
|
397
|
-
query_response_generating_prompty=query_response_generating_prompty,
|
|
426
|
+
query_response_generating_prompty=query_response_generating_prompty, # type: ignore
|
|
398
427
|
prompty_model_config=prompty_model_config,
|
|
399
428
|
query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
|
|
400
429
|
)
|
|
401
|
-
|
|
402
430
|
try:
|
|
403
|
-
query_responses = query_flow(text=text, num_queries=num_queries)
|
|
431
|
+
query_responses = await query_flow(text=text, num_queries=num_queries)
|
|
404
432
|
if isinstance(query_responses, dict):
|
|
405
433
|
keys = list(query_responses.keys())
|
|
406
434
|
return query_responses[keys[0]]
|
|
@@ -411,43 +439,60 @@ class Simulator:
|
|
|
411
439
|
def _load_query_generation_flow(
|
|
412
440
|
self,
|
|
413
441
|
*,
|
|
414
|
-
query_response_generating_prompty: Union[str, os.PathLike],
|
|
442
|
+
query_response_generating_prompty: Optional[Union[str, os.PathLike]],
|
|
415
443
|
prompty_model_config: Dict[str, Any],
|
|
416
444
|
query_response_generating_prompty_kwargs: Dict[str, Any],
|
|
417
|
-
) ->
|
|
445
|
+
) -> "AsyncPrompty":
|
|
418
446
|
"""
|
|
419
447
|
Loads the flow for generating query responses.
|
|
420
448
|
|
|
421
449
|
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
422
|
-
:paramtype query_response_generating_prompty: Union[str, os.PathLike]
|
|
450
|
+
:paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]]
|
|
423
451
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
424
452
|
:paramtype prompty_model_config: Dict[str, Any]
|
|
425
453
|
:keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
|
|
426
454
|
:paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
|
|
427
455
|
:return: The loaded flow for generating query responses.
|
|
428
|
-
:rtype:
|
|
456
|
+
:rtype: AsyncPrompty
|
|
429
457
|
"""
|
|
430
458
|
if not query_response_generating_prompty:
|
|
431
459
|
package = "azure.ai.evaluation.simulator._prompty"
|
|
432
460
|
resource_name = "task_query_response.prompty"
|
|
433
461
|
try:
|
|
434
462
|
# Access the resource as a file path
|
|
463
|
+
# pylint: disable=deprecated-method
|
|
435
464
|
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
436
|
-
|
|
465
|
+
prompty_model_config = construct_prompty_model_config(
|
|
466
|
+
model_config=prompty_model_config, # type: ignore
|
|
467
|
+
default_api_version="2024-06-01",
|
|
468
|
+
user_agent=USER_AGENT,
|
|
469
|
+
)
|
|
470
|
+
return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
|
|
437
471
|
except FileNotFoundError as e:
|
|
438
|
-
|
|
439
|
-
|
|
472
|
+
msg = f"Flow path for {resource_name} does not exist in package {package}."
|
|
473
|
+
raise EvaluationException(
|
|
474
|
+
message=msg,
|
|
475
|
+
internal_message=msg,
|
|
476
|
+
error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
|
|
477
|
+
blame=ErrorBlame.USER_ERROR,
|
|
478
|
+
) from e
|
|
479
|
+
prompty_model_config = construct_prompty_model_config(
|
|
480
|
+
model_config=prompty_model_config, # type: ignore
|
|
481
|
+
default_api_version="2024-06-01",
|
|
482
|
+
user_agent=USER_AGENT,
|
|
483
|
+
)
|
|
484
|
+
return AsyncPrompty.load(
|
|
440
485
|
source=query_response_generating_prompty,
|
|
441
486
|
model=prompty_model_config,
|
|
442
487
|
**query_response_generating_prompty_kwargs,
|
|
443
|
-
)
|
|
488
|
+
) # type: ignore
|
|
444
489
|
|
|
445
490
|
async def _create_conversations_from_query_responses(
|
|
446
491
|
self,
|
|
447
492
|
*,
|
|
448
493
|
query_responses: List[Dict[str, str]],
|
|
449
494
|
max_conversation_turns: int,
|
|
450
|
-
tasks: List[
|
|
495
|
+
tasks: List[str],
|
|
451
496
|
user_simulator_prompty: Optional[str],
|
|
452
497
|
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
453
498
|
target: Callable,
|
|
@@ -461,7 +506,7 @@ class Simulator:
|
|
|
461
506
|
:keyword max_conversation_turns: The maximum number of conversation turns.
|
|
462
507
|
:paramtype max_conversation_turns: int
|
|
463
508
|
:keyword tasks: A list of tasks for the simulation.
|
|
464
|
-
:paramtype tasks: List[
|
|
509
|
+
:paramtype tasks: List[str]
|
|
465
510
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
466
511
|
:paramtype user_simulator_prompty: Optional[str]
|
|
467
512
|
:keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
@@ -491,7 +536,7 @@ class Simulator:
|
|
|
491
536
|
conversation = await self._complete_conversation(
|
|
492
537
|
conversation_starter=query,
|
|
493
538
|
max_conversation_turns=max_conversation_turns,
|
|
494
|
-
task=task,
|
|
539
|
+
task=task, # type: ignore
|
|
495
540
|
user_simulator_prompty=user_simulator_prompty,
|
|
496
541
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
497
542
|
target=target,
|
|
@@ -526,7 +571,7 @@ class Simulator:
|
|
|
526
571
|
target: Callable,
|
|
527
572
|
api_call_delay_sec: float,
|
|
528
573
|
progress_bar: tqdm,
|
|
529
|
-
) -> List[Dict[str, str]]:
|
|
574
|
+
) -> List[Dict[str, Optional[str]]]:
|
|
530
575
|
"""
|
|
531
576
|
Completes a conversation with the target model based on the conversation starter.
|
|
532
577
|
|
|
@@ -547,7 +592,7 @@ class Simulator:
|
|
|
547
592
|
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
548
593
|
:paramtype progress_bar: tqdm
|
|
549
594
|
:return: A list representing the conversation history with each turn's content.
|
|
550
|
-
:rtype: List[Dict[str, str]]
|
|
595
|
+
:rtype: List[Dict[str, Optional[str]]]
|
|
551
596
|
"""
|
|
552
597
|
conversation_history = ConversationHistory()
|
|
553
598
|
# user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
|
|
@@ -555,11 +600,11 @@ class Simulator:
|
|
|
555
600
|
|
|
556
601
|
while len(conversation_history) < max_conversation_turns:
|
|
557
602
|
user_flow = self._load_user_simulation_flow(
|
|
558
|
-
user_simulator_prompty=user_simulator_prompty,
|
|
559
|
-
prompty_model_config=self.
|
|
603
|
+
user_simulator_prompty=user_simulator_prompty, # type: ignore
|
|
604
|
+
prompty_model_config=self.model_config, # type: ignore
|
|
560
605
|
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
561
606
|
)
|
|
562
|
-
conversation_starter_from_simulated_user = user_flow(
|
|
607
|
+
conversation_starter_from_simulated_user = await user_flow(
|
|
563
608
|
task=task,
|
|
564
609
|
conversation_history=[
|
|
565
610
|
{
|
|
@@ -585,41 +630,6 @@ class Simulator:
|
|
|
585
630
|
|
|
586
631
|
return conversation_history.to_list()
|
|
587
632
|
|
|
588
|
-
async def _build_user_simulation_response(
|
|
589
|
-
self,
|
|
590
|
-
task: str,
|
|
591
|
-
conversation_history: List[Dict[str, Any]],
|
|
592
|
-
user_simulator_prompty: Optional[str],
|
|
593
|
-
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
594
|
-
) -> str:
|
|
595
|
-
"""
|
|
596
|
-
Builds a response from the user simulator based on the current conversation history.
|
|
597
|
-
|
|
598
|
-
:param task: A string representing the task details.
|
|
599
|
-
:type task: str
|
|
600
|
-
:param conversation_history: The current conversation history as a list of dictionaries.
|
|
601
|
-
:type conversation_history: List[Dict[str, Any]]
|
|
602
|
-
:param user_simulator_prompty: Path to the user simulator prompty file.
|
|
603
|
-
:type user_simulator_prompty: Optional[str]
|
|
604
|
-
:param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
605
|
-
:type user_simulator_prompty_kwargs: Dict[str, Any]
|
|
606
|
-
:return: The generated response content from the user simulator.
|
|
607
|
-
:rtype: str
|
|
608
|
-
:raises RuntimeError: If an error occurs during response generation.
|
|
609
|
-
"""
|
|
610
|
-
user_flow = self._load_user_simulation_flow(
|
|
611
|
-
user_simulator_prompty=user_simulator_prompty,
|
|
612
|
-
prompty_model_config=self._build_prompty_model_config(),
|
|
613
|
-
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
614
|
-
)
|
|
615
|
-
|
|
616
|
-
try:
|
|
617
|
-
response_content = user_flow(task=task, conversation_history=conversation_history)
|
|
618
|
-
user_response = self._parse_prompty_response(response=response_content)
|
|
619
|
-
return user_response["content"]
|
|
620
|
-
except Exception as e:
|
|
621
|
-
raise RuntimeError("Error building user simulation response") from e
|
|
622
|
-
|
|
623
633
|
async def _get_target_response(
|
|
624
634
|
self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
|
|
625
635
|
) -> str:
|
|
@@ -69,16 +69,16 @@ def monitor_task_simulator(func: Callable[P, R]) -> Callable[P, R]:
|
|
|
69
69
|
|
|
70
70
|
@functools.wraps(func)
|
|
71
71
|
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
text = kwargs.get("text")
|
|
73
|
+
user_persona = kwargs.get("user_persona")
|
|
74
74
|
num_queries = kwargs.get("num_queries", 0)
|
|
75
75
|
max_conversation_turns = kwargs.get("max_conversation_turns", 0)
|
|
76
76
|
decorated_func = monitor_operation(
|
|
77
77
|
activity_name="task.simulator.call",
|
|
78
78
|
activity_type=ActivityType.PUBLICAPI,
|
|
79
79
|
custom_dimensions={
|
|
80
|
-
"text_length":
|
|
81
|
-
"user_persona_length":
|
|
80
|
+
"text_length": len(text) if isinstance(text, str) else 0,
|
|
81
|
+
"user_persona_length": len(user_persona) if isinstance(user_persona, list) else 0,
|
|
82
82
|
"number_of_queries": num_queries,
|
|
83
83
|
"max_conversation_turns": max_conversation_turns,
|
|
84
84
|
},
|