azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (76) hide show
  1. azure/ai/evaluation/__init__.py +4 -4
  2. azure/ai/evaluation/_common/rai_service.py +4 -4
  3. azure/ai/evaluation/_common/utils.py +40 -25
  4. azure/ai/evaluation/_constants.py +13 -0
  5. azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
  6. azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
  7. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
  8. azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
  9. azure/ai/evaluation/_evaluate/_evaluate.py +88 -63
  10. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
  11. azure/ai/evaluation/_evaluate/_utils.py +29 -22
  12. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  13. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +34 -86
  14. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
  15. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  16. azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
  17. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
  18. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
  19. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  20. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
  21. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
  22. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
  23. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
  24. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
  25. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
  26. azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
  27. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
  28. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +29 -79
  29. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
  30. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  31. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +33 -85
  32. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
  33. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
  34. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
  35. azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
  36. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +34 -88
  37. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
  38. azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
  39. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +17 -29
  40. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
  41. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
  42. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -18
  43. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  44. azure/ai/evaluation/_evaluators/_xpia/xpia.py +16 -91
  45. azure/ai/evaluation/_exceptions.py +0 -1
  46. azure/ai/evaluation/_http_utils.py +3 -3
  47. azure/ai/evaluation/_model_configurations.py +36 -8
  48. azure/ai/evaluation/_version.py +1 -1
  49. azure/ai/evaluation/simulator/__init__.py +1 -1
  50. azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
  51. azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  52. azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
  53. azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
  54. azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
  55. azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
  56. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
  57. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
  58. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
  59. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
  60. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  61. azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
  62. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  63. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
  64. azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +166 -88
  65. azure/ai/evaluation/simulator/_tracing.py +21 -24
  66. azure/ai/evaluation/simulator/_utils.py +4 -1
  67. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/METADATA +144 -14
  68. azure_ai_evaluation-1.0.0b3.dist-info/RECORD +98 -0
  69. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -350
  70. azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
  71. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -66
  72. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  73. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  74. azure_ai_evaluation-1.0.0b1.dist-info/RECORD +0 -97
  75. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/WHEEL +0 -0
  76. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,10 @@ import time
8
8
  import uuid
9
9
  from typing import Dict, List
10
10
 
11
- from azure.core.exceptions import HttpResponseError
12
- from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
13
-
14
11
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
15
12
  from azure.ai.evaluation._user_agent import USER_AGENT
13
+ from azure.core.exceptions import HttpResponseError
14
+ from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
16
15
 
17
16
  from .models import OpenAIChatCompletionsModel
18
17
 
@@ -2,15 +2,14 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  import os
5
- from typing import Any, Dict
5
+ from typing import Any
6
6
  from urllib.parse import urljoin, urlparse
7
7
 
8
- from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
9
-
8
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
9
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
11
- from azure.ai.evaluation._user_agent import USER_AGENT
12
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
13
10
  from azure.ai.evaluation._model_configurations import AzureAIProject
11
+ from azure.ai.evaluation._user_agent import USER_AGENT
12
+ from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
14
13
 
15
14
  from ._identity_manager import APITokenManager
16
15
 
@@ -21,7 +20,7 @@ if "RAI_SVC_URL" in os.environ:
21
20
  print(f"Found RAI_SVC_URL in environment variable, using {api_url} for the service endpoint.")
22
21
 
23
22
 
24
- class RAIClient:
23
+ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
25
24
  """Client for the Responsible AI Service
26
25
 
27
26
  :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
@@ -31,7 +30,9 @@ class RAIClient:
31
30
  :type token_manage: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
32
31
  """
33
32
 
34
- def __init__(self, azure_ai_project: AzureAIProject, token_manager: APITokenManager) -> None:
33
+ def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
34
+ self, azure_ai_project: AzureAIProject, token_manager: APITokenManager
35
+ ) -> None:
35
36
  self.azure_ai_project = azure_ai_project
36
37
  self.token_manager = token_manager
37
38
 
@@ -73,7 +74,7 @@ class RAIClient:
73
74
  timeout=5,
74
75
  )
75
76
  if response.status_code != 200:
76
- msg = f"Failed to retrieve the discovery service URL."
77
+ msg = "Failed to retrieve the discovery service URL."
77
78
  raise EvaluationException(
78
79
  message=msg,
79
80
  internal_message=msg,
@@ -104,7 +105,11 @@ class RAIClient:
104
105
  return self.contentharm_parameters
105
106
 
106
107
  async def get_jailbreaks_dataset(self, type: str) -> Any:
107
- "Get the jailbreaks dataset, if exists"
108
+ """Get the jailbreaks dataset, if exists
109
+
110
+ :param type: The dataset type. Should be one of 'xpia' or 'upia'
111
+ :type type: str
112
+ """
108
113
  if self.jailbreaks_dataset is None:
109
114
  if type == "xpia":
110
115
  self.jailbreaks_dataset = await self.get(self.xpia_jailbreaks_json_endpoint)
@@ -146,8 +151,10 @@ class RAIClient:
146
151
  if response.status_code == 200:
147
152
  return response.json()
148
153
 
149
- msg = "Azure safety evaluation service is not available in your current region, "
150
- "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
154
+ msg = (
155
+ "Azure safety evaluation service is not available in your current region, "
156
+ + "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
157
+ )
151
158
  raise EvaluationException(
152
159
  message=msg,
153
160
  internal_message=msg,
@@ -2,7 +2,7 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import Any, Dict, Optional
5
+ from typing import Optional
6
6
 
7
7
  from azure.ai.evaluation._model_configurations import AzureAIProject
8
8
 
@@ -12,10 +12,9 @@ from abc import ABC, abstractmethod
12
12
  from collections import deque
13
13
  from typing import Deque, Dict, List, Optional, Union
14
14
  from urllib.parse import urlparse
15
- import ast
16
15
 
16
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
17
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
18
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
19
18
 
20
19
  from ._identity_manager import APITokenManager
21
20
 
@@ -29,16 +28,15 @@ def get_model_class_from_url(endpoint_url: str):
29
28
 
30
29
  if endpoint_path.endswith("chat/completions"):
31
30
  return OpenAIChatCompletionsModel
32
- elif endpoint_path.endswith("completions"):
31
+ if endpoint_path.endswith("completions"):
33
32
  return OpenAICompletionsModel
34
- else:
35
- raise EvaluationException(
36
- message=f"Unknown API type for endpoint {endpoint_url}",
37
- internal_message="Unknown API type",
38
- error_category=ErrorCategory.UNKNOWN_FIELD,
39
- error_blame=ErrorBlame.USER_ERROR,
40
- error_target=ErrorTarget.MODELS,
41
- )
33
+ raise EvaluationException(
34
+ message=f"Unknown API type for endpoint {endpoint_url}",
35
+ internal_message="Unknown API type",
36
+ error_category=ErrorCategory.UNKNOWN_FIELD,
37
+ error_blame=ErrorBlame.USER_ERROR,
38
+ error_target=ErrorTarget.MODELS,
39
+ )
42
40
 
43
41
 
44
42
  # ===========================================================
File without changes
@@ -33,7 +33,8 @@ Answer must not be more than 5 words
33
33
  Answer must be picked from Text as is
34
34
  Question should be as descriptive as possible and must include as much context as possible from Text
35
35
  Output must always have the provided number of QnAs
36
- Output must be in JSON format
36
+ Output must be in JSON format.
37
+ Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable.
37
38
  Text:
38
39
  <|text_start|>
39
40
  On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States.
@@ -3,25 +3,27 @@
3
3
  # ---------------------------------------------------------
4
4
  # Copyright (c) Microsoft Corporation. All rights reserved.
5
5
  # ---------------------------------------------------------
6
- import re
7
6
  import asyncio
7
+ import importlib.resources as pkg_resources
8
8
  import json
9
9
  import os
10
- from typing import Any, Dict, List, Optional
10
+ import re
11
11
  import warnings
12
-
13
- from tqdm import tqdm
12
+ from typing import Any, Callable, Dict, List, Optional, Union
14
13
 
15
14
  from promptflow.client import load_flow
16
- from promptflow.core import AzureOpenAIModelConfiguration
15
+ from promptflow.core import AzureOpenAIModelConfiguration, Flow
16
+ from tqdm import tqdm
17
17
 
18
18
  from .._user_agent import USER_AGENT
19
19
  from ._conversation.constants import ConversationRole
20
- from ._helpers import ConversationHistory, Turn
20
+ from ._helpers import ConversationHistory, Turn, experimental
21
+
21
22
  # from ._tracing import monitor_task_simulator
22
23
  from ._utils import JsonLineChatProtocol
23
24
 
24
25
 
26
+ @experimental
25
27
  class Simulator:
26
28
  """
27
29
  Simulator for generating synthetic conversations.
@@ -39,7 +41,7 @@ class Simulator:
39
41
  """
40
42
  self._validate_project_config(azure_ai_project)
41
43
  self.azure_ai_project = azure_ai_project
42
- self.azure_ai_project["api_version"] = "2024-02-15-preview"
44
+ self.azure_ai_project["api_version"] = "2024-06-01"
43
45
  self.credential = credential
44
46
 
45
47
  @staticmethod
@@ -48,6 +50,7 @@ class Simulator:
48
50
  Validates the azure_ai_project configuration to ensure all required keys are present and have non-None values.
49
51
 
50
52
  :param azure_ai_project: The Azure AI project configuration dictionary.
53
+ :type azure_ai_project: Dict[str, Any]
51
54
  :raises ValueError: If required keys are missing or any of the values are None.
52
55
  """
53
56
  required_keys = ["subscription_id", "resource_group_name", "project_name"]
@@ -60,7 +63,7 @@ class Simulator:
60
63
  async def __call__(
61
64
  self,
62
65
  *,
63
- target: callable,
66
+ target: Callable,
64
67
  max_conversation_turns: int = 5,
65
68
  tasks: List[Dict] = [],
66
69
  text: str = "",
@@ -77,7 +80,7 @@ class Simulator:
77
80
  Generates synthetic conversations based on provided parameters.
78
81
 
79
82
  :keyword target: The target function to call during the simulation.
80
- :paramtype target: callable
83
+ :paramtype target: Callable
81
84
  :keyword max_conversation_turns: Maximum number of conversation turns for the simulation. Each turn consists of a user and an assistant message.
82
85
  :paramtype max_conversation_turns: int
83
86
  :keyword tasks: A list of user tasks, each represented as a list of strings. Text should be relevant for the tasks and facilitate the simulation. One example is to use text to provide context for the tasks.
@@ -126,7 +129,6 @@ class Simulator:
126
129
  max_conversation_turns *= 2 # account for both user and assistant turns
127
130
 
128
131
  prompty_model_config = self._build_prompty_model_config()
129
-
130
132
  if conversation_turns:
131
133
  return await self._simulate_with_predefined_turns(
132
134
  target=target,
@@ -172,7 +174,7 @@ class Simulator:
172
174
  async def _simulate_with_predefined_turns(
173
175
  self,
174
176
  *,
175
- target: callable,
177
+ target: Callable,
176
178
  max_conversation_turns: int,
177
179
  conversation_turns: List[List[str]],
178
180
  user_simulator_prompty: Optional[str],
@@ -183,19 +185,26 @@ class Simulator:
183
185
  """
184
186
  Simulates conversations using predefined conversation turns.
185
187
 
186
- :param target: The target function to call during each turn of the simulation.
187
- :param max_conversation_turns: Maximum number of turns for the simulation.
188
- :param conversation_turns: A list of predefined conversation turns.
189
- :param user_simulator_prompty: Path to the user simulator prompty file.
190
- :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
191
- :param api_call_delay_sec: Delay in seconds between API calls.
192
- :param prompty_model_config: The configuration for the prompty model.
188
+ :keyword target: The target function to call during each turn of the simulation.
189
+ :paramtype target: Callable
190
+ :keyword max_conversation_turns: Maximum number of turns for the simulation.
191
+ :paramtype max_conversation_turns: int
192
+ :keyword conversation_turns: A list of predefined conversation turns.
193
+ :paramtype conversation_turns: List[List[str]]
194
+ :keyword user_simulator_prompty: Path to the user simulator prompty file.
195
+ :paramtype user_simulator_prompty: Optional[str]
196
+ :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
197
+ :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
198
+ :keyword api_call_delay_sec: Delay in seconds between API calls.
199
+ :paramtype api_call_delay_sec: float
200
+ :keyword prompty_model_config: The configuration for the prompty model.
201
+ :paramtype prompty_model_config: Dict[str, Any]
193
202
  :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
194
203
  :rtype: List[JsonLineChatProtocol]
195
204
  """
196
205
  simulated_conversations = []
197
206
  progress_bar = tqdm(
198
- total=int(len(conversation_turns) * (max_conversation_turns/2)),
207
+ total=int(len(conversation_turns) * (max_conversation_turns / 2)),
199
208
  desc="Simulating with predefined conversation turns: ",
200
209
  ncols=100,
201
210
  unit="messages",
@@ -213,7 +222,7 @@ class Simulator:
213
222
  current_simulation.add_to_history(assistant_turn)
214
223
  progress_bar.update(1) # Update progress bar for both user and assistant turns
215
224
 
216
- if current_simulation.get_length() < max_conversation_turns:
225
+ if len(current_simulation) < max_conversation_turns:
217
226
  await self._extend_conversation_with_simulator(
218
227
  current_simulation=current_simulation,
219
228
  max_conversation_turns=max_conversation_turns,
@@ -224,8 +233,16 @@ class Simulator:
224
233
  target=target,
225
234
  progress_bar=progress_bar,
226
235
  )
227
-
228
- simulated_conversations.append(current_simulation.to_list())
236
+ simulated_conversations.append(
237
+ JsonLineChatProtocol(
238
+ {
239
+ "messages": current_simulation.to_list(),
240
+ "finish_reason": ["stop"],
241
+ "context": {},
242
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
243
+ }
244
+ )
245
+ )
229
246
 
230
247
  progress_bar.close()
231
248
  return simulated_conversations
@@ -239,20 +256,28 @@ class Simulator:
239
256
  user_simulator_prompty_kwargs: Dict[str, Any],
240
257
  api_call_delay_sec: float,
241
258
  prompty_model_config: Dict[str, Any],
242
- target: callable,
259
+ target: Callable,
243
260
  progress_bar: tqdm,
244
261
  ):
245
262
  """
246
263
  Extends an ongoing conversation using a user simulator until the maximum number of turns is reached.
247
264
 
248
- :param current_simulation: The current state of the conversation history.
249
- :param max_conversation_turns: The maximum number of conversation turns.
250
- :param user_simulator_prompty: Path to the user simulator prompty file.
251
- :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
252
- :param api_call_delay_sec: Delay in seconds between API calls.
253
- :param prompty_model_config: The configuration for the prompty model.
254
- :param target: The target function to call for responses.
255
- :param progress_bar: Progress bar for tracking simulation progress.
265
+ :keyword current_simulation: The current state of the conversation history.
266
+ :paramtype current_simulation: ConversationHistory,
267
+ :keyword max_conversation_turns: The maximum number of conversation turns.
268
+ :paramtype max_conversation_turns: int,
269
+ :keyword user_simulator_prompty: Path to the user simulator prompty file.
270
+ :paramtype user_simulator_prompty: Optional[str],
271
+ :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
272
+ :paramtype user_simulator_prompty_kwargs: Dict[str, Any],
273
+ :keyword api_call_delay_sec: Delay in seconds between API calls.
274
+ :paramtype api_call_delay_sec: float,
275
+ :keyword prompty_model_config: The configuration for the prompty model.
276
+ :paramtype prompty_model_config: Dict[str, Any],
277
+ :keyword target: The target function to call for responses.
278
+ :paramtype target: Callable,
279
+ :keyword progress_bar: Progress bar for tracking simulation progress.
280
+ :paramtype progress_bar: tqdm,
256
281
  """
257
282
  user_flow = self._load_user_simulation_flow(
258
283
  user_simulator_prompty=user_simulator_prompty,
@@ -260,9 +285,11 @@ class Simulator:
260
285
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
261
286
  )
262
287
 
263
- while current_simulation.get_length() < max_conversation_turns:
288
+ while len(current_simulation) < max_conversation_turns:
264
289
  user_response_content = user_flow(
265
- task="Continue the conversation", conversation_history=current_simulation.to_list()
290
+ task="Continue the conversation",
291
+ conversation_history=current_simulation.to_list(),
292
+ **user_simulator_prompty_kwargs,
266
293
  )
267
294
  user_response = self._parse_prompty_response(response=user_response_content)
268
295
  user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
@@ -276,20 +303,34 @@ class Simulator:
276
303
  progress_bar.update(1)
277
304
 
278
305
  def _load_user_simulation_flow(
279
- self, *, user_simulator_prompty, prompty_model_config, user_simulator_prompty_kwargs
280
- ):
306
+ self,
307
+ *,
308
+ user_simulator_prompty: Union[str, os.PathLike],
309
+ prompty_model_config: Dict[str, Any],
310
+ user_simulator_prompty_kwargs: Dict[str, Any],
311
+ ) -> Flow:
281
312
  """
282
313
  Loads the flow for simulating user interactions.
283
314
 
284
- :param user_simulator_prompty: Path to the user simulator prompty file.
285
- :param prompty_model_config: The configuration for the prompty model.
286
- :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
315
+ :keyword user_simulator_prompty: Path to the user simulator prompty file.
316
+ :paramtype user_simulator_prompty: Union[str, os.PathLike]
317
+ :keyword prompty_model_config: The configuration for the prompty model.
318
+ :paramtype prompty_model_config: Dict[str, Any]
319
+ :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
320
+ :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
287
321
  :return: The loaded flow for simulating user interactions.
322
+ :rtype: Flow
288
323
  """
289
324
  if not user_simulator_prompty:
290
- current_dir = os.path.dirname(__file__)
291
- prompty_path = os.path.join(current_dir, "_prompty", "task_simulate.prompty")
292
- return load_flow(source=prompty_path, model=prompty_model_config)
325
+ package = "azure.ai.evaluation.simulator._prompty"
326
+ resource_name = "task_simulate.prompty"
327
+ try:
328
+ # Access the resource as a file path
329
+ # pylint: disable=deprecated-method
330
+ with pkg_resources.path(package, resource_name) as prompty_path:
331
+ return load_flow(source=str(prompty_path), model=prompty_model_config)
332
+ except FileNotFoundError as e:
333
+ raise f"Flow path for {resource_name} does not exist in package {package}." from e
293
334
  return load_flow(
294
335
  source=user_simulator_prompty,
295
336
  model=prompty_model_config,
@@ -300,19 +341,20 @@ class Simulator:
300
341
  """
301
342
  Parses the response from the prompty execution.
302
343
 
303
- :param response: The raw response from the prompty.
344
+ :keyword response: The raw response from the prompty.
345
+ :paramtype str: str
304
346
  :return: A dictionary representing the parsed response content.
305
347
  :rtype: Dict[str, Any]
306
348
  :raises ValueError: If the response cannot be parsed.
307
349
  """
308
350
  try:
309
- if type(response) == str:
310
- response = response.replace('\u2019', "'").replace('\u2018', "'")
311
- response = response.replace('\u201C', '"').replace('\u201D', '"')
312
-
351
+ if isinstance(response, str):
352
+ response = response.replace("\u2019", "'").replace("\u2018", "'")
353
+ response = response.replace("\u201C", '"').replace("\u201D", '"')
354
+
313
355
  # Replace None with null
314
- response = response.replace('None', 'null')
315
-
356
+ response = response.replace("None", "null")
357
+
316
358
  # Escape unescaped single quotes inside string values
317
359
  def escape_single_quotes(match):
318
360
  s = match.group(0)
@@ -347,11 +389,16 @@ class Simulator:
347
389
  """
348
390
  Generates query responses using the specified prompty configuration.
349
391
 
350
- :param text: The input text for generating queries.
351
- :param num_queries: The number of queries to generate.
352
- :param query_response_generating_prompty: Path to the query response generating prompty file.
353
- :param query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
354
- :param prompty_model_config: The configuration for the prompty model.
392
+ :keyword text: The input text for generating queries.
393
+ :paramtype text: str
394
+ :keyword num_queries: The number of queries to generate.
395
+ :paramtype num_queries: int
396
+ :keyword query_response_generating_prompty: Path to the query response generating prompty file.
397
+ :paramtype query_response_generating_prompty: Optional[str]
398
+ :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
399
+ :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
400
+ :keyword prompty_model_config: The configuration for the prompty model.
401
+ :paramtype prompty_model_config: Dict[str, Any]
355
402
  :return: A list of query-response dictionaries.
356
403
  :rtype: List[Dict[str, str]]
357
404
  :raises RuntimeError: If an error occurs during query generation.
@@ -361,10 +408,9 @@ class Simulator:
361
408
  prompty_model_config=prompty_model_config,
362
409
  query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
363
410
  )
364
-
365
411
  try:
366
412
  query_responses = query_flow(text=text, num_queries=num_queries)
367
- if type(query_responses) == dict:
413
+ if isinstance(query_responses, dict):
368
414
  keys = list(query_responses.keys())
369
415
  return query_responses[keys[0]]
370
416
  return json.loads(query_responses)
@@ -372,20 +418,34 @@ class Simulator:
372
418
  raise RuntimeError("Error generating query responses") from e
373
419
 
374
420
  def _load_query_generation_flow(
375
- self, *, query_response_generating_prompty, prompty_model_config, query_response_generating_prompty_kwargs
376
- ):
421
+ self,
422
+ *,
423
+ query_response_generating_prompty: Union[str, os.PathLike],
424
+ prompty_model_config: Dict[str, Any],
425
+ query_response_generating_prompty_kwargs: Dict[str, Any],
426
+ ) -> Flow:
377
427
  """
378
428
  Loads the flow for generating query responses.
379
429
 
380
- :param query_response_generating_prompty: Path to the query response generating prompty file.
381
- :param prompty_model_config: The configuration for the prompty model.
382
- :param query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
430
+ :keyword query_response_generating_prompty: Path to the query response generating prompty file.
431
+ :paramtype query_response_generating_prompty: Union[str, os.PathLike]
432
+ :keyword prompty_model_config: The configuration for the prompty model.
433
+ :paramtype prompty_model_config: Dict[str, Any]
434
+ :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
435
+ :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
383
436
  :return: The loaded flow for generating query responses.
437
+ :rtype: Flow
384
438
  """
385
439
  if not query_response_generating_prompty:
386
- current_dir = os.path.dirname(__file__)
387
- prompty_path = os.path.join(current_dir, "_prompty", "task_query_response.prompty")
388
- return load_flow(source=prompty_path, model=prompty_model_config)
440
+ package = "azure.ai.evaluation.simulator._prompty"
441
+ resource_name = "task_query_response.prompty"
442
+ try:
443
+ # Access the resource as a file path
444
+ # pylint: disable=deprecated-method
445
+ with pkg_resources.path(package, resource_name) as prompty_path:
446
+ return load_flow(source=str(prompty_path), model=prompty_model_config)
447
+ except FileNotFoundError as e:
448
+ raise f"Flow path for {resource_name} does not exist in package {package}." from e
389
449
  return load_flow(
390
450
  source=query_response_generating_prompty,
391
451
  model=prompty_model_config,
@@ -400,26 +460,33 @@ class Simulator:
400
460
  tasks: List[Dict],
401
461
  user_simulator_prompty: Optional[str],
402
462
  user_simulator_prompty_kwargs: Dict[str, Any],
403
- target: callable,
463
+ target: Callable,
404
464
  api_call_delay_sec: float,
405
465
  ) -> List[JsonLineChatProtocol]:
406
466
  """
407
467
  Creates full conversations from query-response pairs.
408
468
 
409
- :param query_responses: A list of query-response pairs.
410
- :param max_conversation_turns: The maximum number of conversation turns.
411
- :param tasks: A list of tasks for the simulation.
412
- :param user_simulator_prompty: Path to the user simulator prompty file.
413
- :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
414
- :param target: The target function to call for responses.
415
- :param api_call_delay_sec: Delay in seconds between API calls.
469
+ :keyword query_responses: A list of query-response pairs.
470
+ :paramtype query_responses: List[Dict[str, str]]
471
+ :keyword max_conversation_turns: The maximum number of conversation turns.
472
+ :paramtype max_conversation_turns: int
473
+ :keyword tasks: A list of tasks for the simulation.
474
+ :paramtype tasks: List[Dict]
475
+ :keyword user_simulator_prompty: Path to the user simulator prompty file.
476
+ :paramtype user_simulator_prompty: Optional[str]
477
+ :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
478
+ :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
479
+ :keyword target: The target function to call for responses.
480
+ :paramtype target: Callable
481
+ :keyword api_call_delay_sec: Delay in seconds between API calls.
482
+ :paramtype api_call_delay_sec: float
416
483
  :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
417
484
  :rtype: List[JsonLineChatProtocol]
418
485
  """
419
486
  total_turns = len(query_responses) * max_conversation_turns
420
487
 
421
488
  progress_bar = tqdm(
422
- total=int(total_turns/2),
489
+ total=int(total_turns / 2),
423
490
  desc="Generating: ",
424
491
  ncols=100,
425
492
  unit="message",
@@ -466,7 +533,7 @@ class Simulator:
466
533
  task: str,
467
534
  user_simulator_prompty: Optional[str],
468
535
  user_simulator_prompty_kwargs: Dict[str, Any],
469
- target: callable,
536
+ target: Callable,
470
537
  api_call_delay_sec: float,
471
538
  progress_bar: tqdm,
472
539
  ) -> List[Dict[str, str]]:
@@ -484,7 +551,7 @@ class Simulator:
484
551
  :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
485
552
  :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
486
553
  :keyword target: The target function to call for responses.
487
- :paramtype target: callable
554
+ :paramtype target: Callable
488
555
  :keyword api_call_delay_sec: Delay in seconds between API calls.
489
556
  :paramtype api_call_delay_sec: float
490
557
  :keyword progress_bar: Progress bar for tracking simulation progress.
@@ -496,20 +563,23 @@ class Simulator:
496
563
  # user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
497
564
  # conversation_history.add_to_history(user_turn)
498
565
 
499
- while conversation_history.get_length() < max_conversation_turns:
566
+ while len(conversation_history) < max_conversation_turns:
500
567
  user_flow = self._load_user_simulation_flow(
501
568
  user_simulator_prompty=user_simulator_prompty,
502
569
  prompty_model_config=self._build_prompty_model_config(),
503
570
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
504
571
  )
505
572
  conversation_starter_from_simulated_user = user_flow(
506
- task=task, conversation_history=[{
507
- "role": "assistant",
508
- "content": conversation_starter,
509
- "your_task": "Act as the user and translate the content into a user query."
510
- }]
573
+ task=task,
574
+ conversation_history=[
575
+ {
576
+ "role": "assistant",
577
+ "content": conversation_starter,
578
+ "your_task": "Act as the user and translate the content into a user query.",
579
+ }
580
+ ],
511
581
  )
512
- if type(conversation_starter_from_simulated_user) == dict:
582
+ if isinstance(conversation_starter_from_simulated_user, dict):
513
583
  conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"]
514
584
  user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user)
515
585
  conversation_history.add_to_history(user_turn)
@@ -520,7 +590,7 @@ class Simulator:
520
590
  conversation_history.add_to_history(assistant_turn)
521
591
  progress_bar.update(1)
522
592
 
523
- if conversation_history.get_length() >= max_conversation_turns:
593
+ if len(conversation_history) >= max_conversation_turns:
524
594
  break
525
595
 
526
596
  return conversation_history.to_list()
@@ -536,9 +606,13 @@ class Simulator:
536
606
  Builds a response from the user simulator based on the current conversation history.
537
607
 
538
608
  :param task: A string representing the task details.
609
+ :type task: str
539
610
  :param conversation_history: The current conversation history as a list of dictionaries.
611
+ :type conversation_history: List[Dict[str, Any]]
540
612
  :param user_simulator_prompty: Path to the user simulator prompty file.
613
+ :type user_simulator_prompty: Optional[str]
541
614
  :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
615
+ :type user_simulator_prompty_kwargs: Dict[str, Any]
542
616
  :return: The generated response content from the user simulator.
543
617
  :rtype: str
544
618
  :raises RuntimeError: If an error occurs during response generation.
@@ -548,23 +622,27 @@ class Simulator:
548
622
  prompty_model_config=self._build_prompty_model_config(),
549
623
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
550
624
  )
551
-
552
625
  try:
553
- response_content = user_flow(task=task, conversation_history=conversation_history)
626
+ response_content = user_flow(
627
+ task=task, conversation_history=conversation_history, **user_simulator_prompty_kwargs
628
+ )
554
629
  user_response = self._parse_prompty_response(response=response_content)
555
630
  return user_response["content"]
556
631
  except Exception as e:
557
632
  raise RuntimeError("Error building user simulation response") from e
558
633
 
559
634
  async def _get_target_response(
560
- self, *, target: callable, api_call_delay_sec: float, conversation_history: ConversationHistory
635
+ self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
561
636
  ) -> str:
562
637
  """
563
638
  Retrieves the response from the target callback based on the current conversation history.
564
639
 
565
- :param target: The target function to call for a response.
566
- :param api_call_delay_sec: Delay in seconds before retrieving the response.
567
- :param conversation_history: The current conversation history.
640
+ :keyword target: The target function to call for a response.
641
+ :paramtype target: Callable
642
+ :keyword api_call_delay_sec: Delay in seconds before retrieving the response.
643
+ :paramtype api_call_delay_sec: float
644
+ :keyword conversation_history: The current conversation history.
645
+ :paramtype conversation_history: ConversationHistory
568
646
  :return: The content of the response from the target.
569
647
  :rtype: str
570
648
  """