azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (78) hide show
  1. azure/ai/evaluation/__init__.py +9 -5
  2. azure/ai/evaluation/_common/constants.py +4 -2
  3. azure/ai/evaluation/_common/math.py +18 -0
  4. azure/ai/evaluation/_common/rai_service.py +54 -62
  5. azure/ai/evaluation/_common/utils.py +201 -16
  6. azure/ai/evaluation/_constants.py +12 -0
  7. azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +10 -3
  8. azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +33 -17
  9. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +17 -2
  10. azure/ai/evaluation/_evaluate/_eval_run.py +26 -10
  11. azure/ai/evaluation/_evaluate/_evaluate.py +161 -89
  12. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +16 -17
  13. azure/ai/evaluation/_evaluate/_utils.py +44 -25
  14. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +33 -79
  15. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
  16. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  17. azure/ai/evaluation/_evaluators/_common/_base_eval.py +331 -0
  18. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +76 -0
  19. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +97 -0
  20. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  21. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -20
  22. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +63 -42
  23. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
  24. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
  25. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
  26. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
  27. azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
  28. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
  29. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +30 -74
  30. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
  31. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +34 -80
  32. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
  33. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
  34. azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -3
  35. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +35 -83
  36. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
  37. azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
  38. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +25 -28
  39. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
  40. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
  41. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +23 -17
  42. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  43. azure/ai/evaluation/_evaluators/_xpia/xpia.py +15 -90
  44. azure/ai/evaluation/_exceptions.py +9 -7
  45. azure/ai/evaluation/_http_utils.py +203 -132
  46. azure/ai/evaluation/_model_configurations.py +37 -9
  47. azure/ai/evaluation/{_evaluators/_chat/retrieval → _vendor}/__init__.py +0 -6
  48. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  49. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  50. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  51. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  52. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  53. azure/ai/evaluation/_version.py +1 -1
  54. azure/ai/evaluation/simulator/_adversarial_simulator.py +85 -60
  55. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
  56. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
  57. azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
  58. azure/ai/evaluation/simulator/_helpers/_experimental.py +20 -9
  59. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +4 -4
  60. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +22 -64
  61. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
  62. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
  63. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
  64. azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
  65. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -6
  66. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -4
  67. azure/ai/evaluation/simulator/_simulator.py +127 -117
  68. azure/ai/evaluation/simulator/_tracing.py +4 -4
  69. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/METADATA +129 -43
  70. azure_ai_evaluation-1.0.0b4.dist-info/NOTICE.txt +50 -0
  71. azure_ai_evaluation-1.0.0b4.dist-info/RECORD +106 -0
  72. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  73. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  74. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  75. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  76. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  77. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/WHEEL +0 -0
  78. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b4.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,24 @@
1
1
  # flake8: noqa
2
- # pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611
2
+ # pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0114,R0913,E0702,R0903,C0411
3
3
  # ---------------------------------------------------------
4
4
  # Copyright (c) Microsoft Corporation. All rights reserved.
5
5
  # ---------------------------------------------------------
6
6
  import asyncio
7
7
  import importlib.resources as pkg_resources
8
+ from tqdm import tqdm
8
9
  import json
9
10
  import os
10
11
  import re
11
12
  import warnings
12
13
  from typing import Any, Callable, Dict, List, Optional, Union
14
+ from promptflow.core import AsyncPrompty
15
+ from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
16
+ from azure.ai.evaluation._common.utils import construct_prompty_model_config
13
17
 
14
- from promptflow.client import load_flow
15
- from promptflow.core import AzureOpenAIModelConfiguration, Flow
16
- from tqdm import tqdm
17
-
18
+ from .._exceptions import ErrorBlame, ErrorCategory, EvaluationException
18
19
  from .._user_agent import USER_AGENT
19
20
  from ._conversation.constants import ConversationRole
20
21
  from ._helpers import ConversationHistory, Turn, experimental
21
-
22
- # from ._tracing import monitor_task_simulator
23
22
  from ._utils import JsonLineChatProtocol
24
23
 
25
24
 
@@ -29,43 +28,60 @@ class Simulator:
29
28
  Simulator for generating synthetic conversations.
30
29
  """
31
30
 
32
- def __init__(self, azure_ai_project: Dict[str, Any], credential: Optional[Any] = None):
31
+ def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
33
32
  """
34
- Initializes the task simulator with a project scope.
33
+ Initializes the task simulator with the model configuration.
35
34
 
36
- :param azure_ai_project: A dictionary defining the scope of the project, including keys such as
37
- "subscription_id", "resource_group_name", and "project_name".
38
- :param credential: Azure credentials to authenticate the user. If None, the default credentials are used.
39
- :paramtype credential: Optional[Any]
40
- :raises ValueError: If the azure_ai_project does not contain the required keys or any value is None.
35
+ :param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration.
36
+ :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration, ~azure.ai.evaluation.OpenAIModelConfiguration]
37
+ :raises ValueError: If the model_config does not contain the required keys or any value is None.
41
38
  """
42
- self._validate_project_config(azure_ai_project)
43
- self.azure_ai_project = azure_ai_project
44
- self.azure_ai_project["api_version"] = "2024-02-15-preview"
45
- self.credential = credential
39
+ self._validate_model_config(model_config)
40
+ self.model_config = model_config
41
+ if "api_version" not in self.model_config:
42
+ self.model_config["api_version"] = "2024-06-01" # type: ignore
46
43
 
47
44
  @staticmethod
48
- def _validate_project_config(azure_ai_project: Dict[str, Any]):
45
+ def _validate_model_config(model_config: Any):
49
46
  """
50
- Validates the azure_ai_project configuration to ensure all required keys are present and have non-None values.
47
+ Validates the model_config to ensure all required keys are present and have non-None values.
48
+ If 'type' is not specified, it will attempt to infer the type based on the keys present.
51
49
 
52
- :param azure_ai_project: The Azure AI project configuration dictionary.
53
- :type azure_ai_project: Dict[str, Any]
50
+ :param model_config: The model configuration dictionary.
51
+ :type model_config: Dict[str, Any]
54
52
  :raises ValueError: If required keys are missing or any of the values are None.
55
53
  """
56
- required_keys = ["subscription_id", "resource_group_name", "project_name"]
57
- if not all(key in azure_ai_project for key in required_keys):
58
- raise ValueError(f"azure_ai_project must contain keys: {', '.join(required_keys)}")
59
- if not all(azure_ai_project[key] for key in required_keys):
60
- raise ValueError("subscription_id, resource_group_name, and project_name must not be None")
54
+ # Attempt to infer 'type' if not provided
55
+ if "type" not in model_config:
56
+ if "azure_deployment" in model_config and "azure_endpoint" in model_config:
57
+ model_config["type"] = "azure_openai"
58
+ elif "model" in model_config:
59
+ model_config["type"] = "openai"
60
+ else:
61
+ raise ValueError(
62
+ "Unable to infer 'type' from model_config. Please specify 'type' as 'azure_openai' or 'openai'."
63
+ )
64
+
65
+ if model_config["type"] == "azure_openai":
66
+ required_keys = ["azure_deployment", "azure_endpoint"]
67
+ elif model_config["type"] == "openai":
68
+ required_keys = ["api_key", "model"]
69
+ else:
70
+ raise ValueError("model_config 'type' must be 'azure_openai' or 'openai'.")
71
+
72
+ missing_keys = [key for key in required_keys if key not in model_config]
73
+ if missing_keys:
74
+ raise ValueError(f"model_config is missing required keys: {', '.join(missing_keys)}")
75
+ none_keys = [key for key in required_keys if model_config.get(key) is None]
76
+ if none_keys:
77
+ raise ValueError(f"The following keys in model_config must not be None: {', '.join(none_keys)}")
61
78
 
62
- # @monitor_task_simulator
63
79
  async def __call__(
64
80
  self,
65
81
  *,
66
82
  target: Callable,
67
83
  max_conversation_turns: int = 5,
68
- tasks: List[Dict] = [],
84
+ tasks: List[str] = [],
69
85
  text: str = "",
70
86
  num_queries: int = 5,
71
87
  query_response_generating_prompty: Optional[str] = None,
@@ -109,7 +125,7 @@ class Simulator:
109
125
 
110
126
  Modes:
111
127
  - Task-Free Mode: When only num_queries is specified and tasks is not, the method generates num_queries x max_conversation_turns lines of simulated data grounded in the context of the text.
112
- - Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines are simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used.
128
+ - Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines will be simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used.
113
129
  - Conversation Starter Mode: When conversation_turns are specified, the method starts each conversation with the user-specified queries and then follows the conversation history for the remaining turns.
114
130
  """
115
131
  if conversation_turns and (text or tasks):
@@ -128,8 +144,7 @@ class Simulator:
128
144
  num_queries = min(num_queries, len(tasks))
129
145
  max_conversation_turns *= 2 # account for both user and assistant turns
130
146
 
131
- prompty_model_config = self._build_prompty_model_config()
132
-
147
+ prompty_model_config = self.model_config
133
148
  if conversation_turns:
134
149
  return await self._simulate_with_predefined_turns(
135
150
  target=target,
@@ -149,7 +164,6 @@ class Simulator:
149
164
  prompty_model_config=prompty_model_config,
150
165
  **kwargs,
151
166
  )
152
-
153
167
  return await self._create_conversations_from_query_responses(
154
168
  query_responses=query_responses,
155
169
  max_conversation_turns=max_conversation_turns,
@@ -160,18 +174,6 @@ class Simulator:
160
174
  api_call_delay_sec=api_call_delay_sec,
161
175
  )
162
176
 
163
- def _build_prompty_model_config(self) -> Dict[str, Any]:
164
- """
165
- Constructs the configuration for the prompty model.
166
-
167
- :return: A dictionary containing the prompty model configuration, including API version and user agent headers if applicable.
168
- :rtype: Dict[str, Any]
169
- """
170
- config = {"configuration": self.azure_ai_project}
171
- if USER_AGENT and isinstance(self.azure_ai_project, AzureOpenAIModelConfiguration):
172
- config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})
173
- return config
174
-
175
177
  async def _simulate_with_predefined_turns(
176
178
  self,
177
179
  *,
@@ -181,7 +183,7 @@ class Simulator:
181
183
  user_simulator_prompty: Optional[str],
182
184
  user_simulator_prompty_kwargs: Dict[str, Any],
183
185
  api_call_delay_sec: float,
184
- prompty_model_config: Dict[str, Any],
186
+ prompty_model_config: Any,
185
187
  ) -> List[JsonLineChatProtocol]:
186
188
  """
187
189
  Simulates conversations using predefined conversation turns.
@@ -199,7 +201,7 @@ class Simulator:
199
201
  :keyword api_call_delay_sec: Delay in seconds between API calls.
200
202
  :paramtype api_call_delay_sec: float
201
203
  :keyword prompty_model_config: The configuration for the prompty model.
202
- :paramtype prompty_model_config: Dict[str, Any]
204
+ :paramtype prompty_model_config: Any
203
205
  :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
204
206
  :rtype: List[JsonLineChatProtocol]
205
207
  """
@@ -234,8 +236,16 @@ class Simulator:
234
236
  target=target,
235
237
  progress_bar=progress_bar,
236
238
  )
237
-
238
- simulated_conversations.append(current_simulation.to_list())
239
+ simulated_conversations.append(
240
+ JsonLineChatProtocol(
241
+ {
242
+ "messages": current_simulation.to_list(),
243
+ "finish_reason": ["stop"],
244
+ "context": {},
245
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
246
+ }
247
+ )
248
+ )
239
249
 
240
250
  progress_bar.close()
241
251
  return simulated_conversations
@@ -273,14 +283,16 @@ class Simulator:
273
283
  :paramtype progress_bar: tqdm,
274
284
  """
275
285
  user_flow = self._load_user_simulation_flow(
276
- user_simulator_prompty=user_simulator_prompty,
286
+ user_simulator_prompty=user_simulator_prompty, # type: ignore
277
287
  prompty_model_config=prompty_model_config,
278
288
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
279
289
  )
280
290
 
281
291
  while len(current_simulation) < max_conversation_turns:
282
- user_response_content = user_flow(
283
- task="Continue the conversation", conversation_history=current_simulation.to_list()
292
+ user_response_content = await user_flow(
293
+ task="Continue the conversation",
294
+ conversation_history=current_simulation.to_list(),
295
+ **user_simulator_prompty_kwargs,
284
296
  )
285
297
  user_response = self._parse_prompty_response(response=user_response_content)
286
298
  user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
@@ -296,43 +308,60 @@ class Simulator:
296
308
  def _load_user_simulation_flow(
297
309
  self,
298
310
  *,
299
- user_simulator_prompty: Union[str, os.PathLike],
311
+ user_simulator_prompty: Optional[Union[str, os.PathLike]],
300
312
  prompty_model_config: Dict[str, Any],
301
313
  user_simulator_prompty_kwargs: Dict[str, Any],
302
- ) -> Flow:
314
+ ) -> "AsyncPrompty": # type: ignore
303
315
  """
304
316
  Loads the flow for simulating user interactions.
305
317
 
306
318
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
307
- :paramtype user_simulator_prompty: Union[str, os.PathLike]
319
+ :paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]]
308
320
  :keyword prompty_model_config: The configuration for the prompty model.
309
321
  :paramtype prompty_model_config: Dict[str, Any]
310
322
  :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
311
323
  :paramtype user_simulator_prompty_kwargs: Dict[str, Any]
312
324
  :return: The loaded flow for simulating user interactions.
313
- :rtype: Flow
325
+ :rtype: AsyncPrompty
314
326
  """
315
327
  if not user_simulator_prompty:
316
328
  package = "azure.ai.evaluation.simulator._prompty"
317
329
  resource_name = "task_simulate.prompty"
318
330
  try:
319
331
  # Access the resource as a file path
332
+ # pylint: disable=deprecated-method
320
333
  with pkg_resources.path(package, resource_name) as prompty_path:
321
- return load_flow(source=str(prompty_path), model=prompty_model_config)
334
+ prompty_model_config = construct_prompty_model_config(
335
+ model_config=prompty_model_config, # type: ignore
336
+ default_api_version="2024-06-01",
337
+ user_agent=USER_AGENT,
338
+ )
339
+ return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
322
340
  except FileNotFoundError as e:
323
- raise f"Flow path for {resource_name} does not exist in package {package}." from e
324
- return load_flow(
341
+ msg = f"Flow path for {resource_name} does not exist in package {package}."
342
+ raise EvaluationException(
343
+ message=msg,
344
+ internal_message=msg,
345
+ error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
346
+ blame=ErrorBlame.USER_ERROR,
347
+ ) from e
348
+ prompty_model_config = construct_prompty_model_config(
349
+ model_config=prompty_model_config, # type: ignore
350
+ default_api_version="2024-06-01",
351
+ user_agent=USER_AGENT,
352
+ )
353
+ return AsyncPrompty.load(
325
354
  source=user_simulator_prompty,
326
355
  model=prompty_model_config,
327
356
  **user_simulator_prompty_kwargs,
328
- )
357
+ ) # type: ignore
329
358
 
330
359
  def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]:
331
360
  """
332
361
  Parses the response from the prompty execution.
333
362
 
334
363
  :keyword response: The raw response from the prompty.
335
- :paramtype str: str
364
+ :paramtype response: str
336
365
  :return: A dictionary representing the parsed response content.
337
366
  :rtype: Dict[str, Any]
338
367
  :raises ValueError: If the response cannot be parsed.
@@ -373,7 +402,7 @@ class Simulator:
373
402
  num_queries: int,
374
403
  query_response_generating_prompty: Optional[str],
375
404
  query_response_generating_prompty_kwargs: Dict[str, Any],
376
- prompty_model_config: Dict[str, Any],
405
+ prompty_model_config: Any,
377
406
  **kwargs,
378
407
  ) -> List[Dict[str, str]]:
379
408
  """
@@ -388,19 +417,18 @@ class Simulator:
388
417
  :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty.
389
418
  :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
390
419
  :keyword prompty_model_config: The configuration for the prompty model.
391
- :paramtype prompty_model_config: Dict[str, Any]
420
+ :paramtype prompty_model_config: Any
392
421
  :return: A list of query-response dictionaries.
393
422
  :rtype: List[Dict[str, str]]
394
423
  :raises RuntimeError: If an error occurs during query generation.
395
424
  """
396
425
  query_flow = self._load_query_generation_flow(
397
- query_response_generating_prompty=query_response_generating_prompty,
426
+ query_response_generating_prompty=query_response_generating_prompty, # type: ignore
398
427
  prompty_model_config=prompty_model_config,
399
428
  query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
400
429
  )
401
-
402
430
  try:
403
- query_responses = query_flow(text=text, num_queries=num_queries)
431
+ query_responses = await query_flow(text=text, num_queries=num_queries)
404
432
  if isinstance(query_responses, dict):
405
433
  keys = list(query_responses.keys())
406
434
  return query_responses[keys[0]]
@@ -411,43 +439,60 @@ class Simulator:
411
439
  def _load_query_generation_flow(
412
440
  self,
413
441
  *,
414
- query_response_generating_prompty: Union[str, os.PathLike],
442
+ query_response_generating_prompty: Optional[Union[str, os.PathLike]],
415
443
  prompty_model_config: Dict[str, Any],
416
444
  query_response_generating_prompty_kwargs: Dict[str, Any],
417
- ) -> Flow:
445
+ ) -> "AsyncPrompty":
418
446
  """
419
447
  Loads the flow for generating query responses.
420
448
 
421
449
  :keyword query_response_generating_prompty: Path to the query response generating prompty file.
422
- :paramtype query_response_generating_prompty: Union[str, os.PathLike]
450
+ :paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]]
423
451
  :keyword prompty_model_config: The configuration for the prompty model.
424
452
  :paramtype prompty_model_config: Dict[str, Any]
425
453
  :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow.
426
454
  :paramtype query_response_generating_prompty_kwargs: Dict[str, Any]
427
455
  :return: The loaded flow for generating query responses.
428
- :rtype: Flow
456
+ :rtype: AsyncPrompty
429
457
  """
430
458
  if not query_response_generating_prompty:
431
459
  package = "azure.ai.evaluation.simulator._prompty"
432
460
  resource_name = "task_query_response.prompty"
433
461
  try:
434
462
  # Access the resource as a file path
463
+ # pylint: disable=deprecated-method
435
464
  with pkg_resources.path(package, resource_name) as prompty_path:
436
- return load_flow(source=str(prompty_path), model=prompty_model_config)
465
+ prompty_model_config = construct_prompty_model_config(
466
+ model_config=prompty_model_config, # type: ignore
467
+ default_api_version="2024-06-01",
468
+ user_agent=USER_AGENT,
469
+ )
470
+ return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
437
471
  except FileNotFoundError as e:
438
- raise f"Flow path for {resource_name} does not exist in package {package}." from e
439
- return load_flow(
472
+ msg = f"Flow path for {resource_name} does not exist in package {package}."
473
+ raise EvaluationException(
474
+ message=msg,
475
+ internal_message=msg,
476
+ error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
477
+ blame=ErrorBlame.USER_ERROR,
478
+ ) from e
479
+ prompty_model_config = construct_prompty_model_config(
480
+ model_config=prompty_model_config, # type: ignore
481
+ default_api_version="2024-06-01",
482
+ user_agent=USER_AGENT,
483
+ )
484
+ return AsyncPrompty.load(
440
485
  source=query_response_generating_prompty,
441
486
  model=prompty_model_config,
442
487
  **query_response_generating_prompty_kwargs,
443
- )
488
+ ) # type: ignore
444
489
 
445
490
  async def _create_conversations_from_query_responses(
446
491
  self,
447
492
  *,
448
493
  query_responses: List[Dict[str, str]],
449
494
  max_conversation_turns: int,
450
- tasks: List[Dict],
495
+ tasks: List[str],
451
496
  user_simulator_prompty: Optional[str],
452
497
  user_simulator_prompty_kwargs: Dict[str, Any],
453
498
  target: Callable,
@@ -461,7 +506,7 @@ class Simulator:
461
506
  :keyword max_conversation_turns: The maximum number of conversation turns.
462
507
  :paramtype max_conversation_turns: int
463
508
  :keyword tasks: A list of tasks for the simulation.
464
- :paramtype tasks: List[Dict]
509
+ :paramtype tasks: List[str]
465
510
  :keyword user_simulator_prompty: Path to the user simulator prompty file.
466
511
  :paramtype user_simulator_prompty: Optional[str]
467
512
  :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
@@ -491,7 +536,7 @@ class Simulator:
491
536
  conversation = await self._complete_conversation(
492
537
  conversation_starter=query,
493
538
  max_conversation_turns=max_conversation_turns,
494
- task=task,
539
+ task=task, # type: ignore
495
540
  user_simulator_prompty=user_simulator_prompty,
496
541
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
497
542
  target=target,
@@ -526,7 +571,7 @@ class Simulator:
526
571
  target: Callable,
527
572
  api_call_delay_sec: float,
528
573
  progress_bar: tqdm,
529
- ) -> List[Dict[str, str]]:
574
+ ) -> List[Dict[str, Optional[str]]]:
530
575
  """
531
576
  Completes a conversation with the target model based on the conversation starter.
532
577
 
@@ -547,7 +592,7 @@ class Simulator:
547
592
  :keyword progress_bar: Progress bar for tracking simulation progress.
548
593
  :paramtype progress_bar: tqdm
549
594
  :return: A list representing the conversation history with each turn's content.
550
- :rtype: List[Dict[str, str]]
595
+ :rtype: List[Dict[str, Optional[str]]]
551
596
  """
552
597
  conversation_history = ConversationHistory()
553
598
  # user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
@@ -555,11 +600,11 @@ class Simulator:
555
600
 
556
601
  while len(conversation_history) < max_conversation_turns:
557
602
  user_flow = self._load_user_simulation_flow(
558
- user_simulator_prompty=user_simulator_prompty,
559
- prompty_model_config=self._build_prompty_model_config(),
603
+ user_simulator_prompty=user_simulator_prompty, # type: ignore
604
+ prompty_model_config=self.model_config, # type: ignore
560
605
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
561
606
  )
562
- conversation_starter_from_simulated_user = user_flow(
607
+ conversation_starter_from_simulated_user = await user_flow(
563
608
  task=task,
564
609
  conversation_history=[
565
610
  {
@@ -585,41 +630,6 @@ class Simulator:
585
630
 
586
631
  return conversation_history.to_list()
587
632
 
588
- async def _build_user_simulation_response(
589
- self,
590
- task: str,
591
- conversation_history: List[Dict[str, Any]],
592
- user_simulator_prompty: Optional[str],
593
- user_simulator_prompty_kwargs: Dict[str, Any],
594
- ) -> str:
595
- """
596
- Builds a response from the user simulator based on the current conversation history.
597
-
598
- :param task: A string representing the task details.
599
- :type task: str
600
- :param conversation_history: The current conversation history as a list of dictionaries.
601
- :type conversation_history: List[Dict[str, Any]]
602
- :param user_simulator_prompty: Path to the user simulator prompty file.
603
- :type user_simulator_prompty: Optional[str]
604
- :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
605
- :type user_simulator_prompty_kwargs: Dict[str, Any]
606
- :return: The generated response content from the user simulator.
607
- :rtype: str
608
- :raises RuntimeError: If an error occurs during response generation.
609
- """
610
- user_flow = self._load_user_simulation_flow(
611
- user_simulator_prompty=user_simulator_prompty,
612
- prompty_model_config=self._build_prompty_model_config(),
613
- user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
614
- )
615
-
616
- try:
617
- response_content = user_flow(task=task, conversation_history=conversation_history)
618
- user_response = self._parse_prompty_response(response=response_content)
619
- return user_response["content"]
620
- except Exception as e:
621
- raise RuntimeError("Error building user simulation response") from e
622
-
623
633
  async def _get_target_response(
624
634
  self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
625
635
  ) -> str:
@@ -69,16 +69,16 @@ def monitor_task_simulator(func: Callable[P, R]) -> Callable[P, R]:
69
69
 
70
70
  @functools.wraps(func)
71
71
  def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
72
- text_length = len(kwargs.get("text", ""))
73
- user_persona_length = len(kwargs.get("user_persona", []))
72
+ text = kwargs.get("text")
73
+ user_persona = kwargs.get("user_persona")
74
74
  num_queries = kwargs.get("num_queries", 0)
75
75
  max_conversation_turns = kwargs.get("max_conversation_turns", 0)
76
76
  decorated_func = monitor_operation(
77
77
  activity_name="task.simulator.call",
78
78
  activity_type=ActivityType.PUBLICAPI,
79
79
  custom_dimensions={
80
- "text_length": text_length,
81
- "user_persona_length": user_persona_length,
80
+ "text_length": len(text) if isinstance(text, str) else 0,
81
+ "user_persona_length": len(user_persona) if isinstance(user_persona, list) else 0,
82
82
  "number_of_queries": num_queries,
83
83
  "max_conversation_turns": max_conversation_turns,
84
84
  },