azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.0.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (43) hide show
  1. azure/ai/evaluation/__init__.py +9 -5
  2. azure/ai/evaluation/_common/utils.py +24 -9
  3. azure/ai/evaluation/_constants.py +4 -0
  4. azure/ai/evaluation/_evaluate/_evaluate.py +57 -39
  5. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +34 -81
  6. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
  7. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  8. azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
  9. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
  10. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
  11. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  12. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
  13. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
  14. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
  15. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
  16. azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
  17. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +29 -74
  18. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
  19. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +33 -80
  20. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
  21. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
  22. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +34 -83
  23. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
  24. azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
  25. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +16 -22
  26. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
  27. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -11
  28. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  29. azure/ai/evaluation/_evaluators/_xpia/xpia.py +16 -90
  30. azure/ai/evaluation/_exceptions.py +0 -1
  31. azure/ai/evaluation/_model_configurations.py +36 -8
  32. azure/ai/evaluation/_version.py +1 -1
  33. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
  34. azure/ai/evaluation/simulator/_simulator.py +19 -8
  35. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/METADATA +59 -1
  36. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/RECORD +38 -39
  37. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  38. azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
  39. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  40. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  41. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  42. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/WHEEL +0 -0
  43. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt +0 -0
@@ -3,78 +3,13 @@
3
3
  # ---------------------------------------------------------
4
4
 
5
5
  import os
6
- import re
6
+ from typing import Optional
7
+ from typing_extensions import override
7
8
 
8
- import numpy as np
9
- from promptflow._utils.async_utils import async_run_allowing_running_loop
10
- from promptflow.core import AsyncPrompty
9
+ from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
11
10
 
12
- from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
11
 
14
- from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
15
-
16
- try:
17
- from ..._user_agent import USER_AGENT
18
- except ImportError:
19
- USER_AGENT = None
20
-
21
-
22
- class _AsyncRelevanceEvaluator:
23
- # Constants must be defined within eval's directory to be save/loadable
24
- PROMPTY_FILE = "relevance.prompty"
25
- LLM_CALL_TIMEOUT = 600
26
- DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
27
-
28
- def __init__(self, model_config: dict):
29
- ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
30
-
31
- prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
32
-
33
- # Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
34
- # https://github.com/encode/httpx/discussions/2959
35
- prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
36
-
37
- ensure_user_agent_in_aoai_model_config(
38
- model_config,
39
- prompty_model_config,
40
- USER_AGENT,
41
- )
42
-
43
- current_dir = os.path.dirname(__file__)
44
- prompty_path = os.path.join(current_dir, self.PROMPTY_FILE)
45
- self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
46
-
47
- async def __call__(self, *, query: str, response: str, context: str, **kwargs):
48
- # Validate input parameters
49
- query = str(query or "")
50
- response = str(response or "")
51
- context = str(context or "")
52
-
53
- if not (query.strip() and response.strip() and context.strip()):
54
- msg = "'query', 'response' and 'context' must be non-empty strings."
55
- raise EvaluationException(
56
- message=msg,
57
- internal_message=msg,
58
- error_category=ErrorCategory.MISSING_FIELD,
59
- error_blame=ErrorBlame.USER_ERROR,
60
- error_target=ErrorTarget.RELEVANCE_EVALUATOR,
61
- )
62
-
63
- # Run the evaluation flow
64
- llm_output = await self._flow(
65
- query=query, response=response, context=context, timeout=self.LLM_CALL_TIMEOUT, **kwargs
66
- )
67
-
68
- score = np.nan
69
- if llm_output:
70
- match = re.search(r"\d", llm_output)
71
- if match:
72
- score = float(match.group())
73
-
74
- return {"gpt_relevance": float(score)}
75
-
76
-
77
- class RelevanceEvaluator:
12
+ class RelevanceEvaluator(PromptyEvaluatorBase):
78
13
  """
79
14
  Initialize a relevance evaluator configured for a specific Azure OpenAI model.
80
15
 
@@ -102,25 +37,41 @@ class RelevanceEvaluator:
102
37
  }
103
38
  """
104
39
 
105
- def __init__(self, model_config: dict):
106
- self._async_evaluator = _AsyncRelevanceEvaluator(model_config)
40
+ # Constants must be defined within eval's directory to be save/loadable
41
+ PROMPTY_FILE = "relevance.prompty"
42
+ RESULT_KEY = "gpt_relevance"
107
43
 
108
- def __call__(self, *, query: str, response: str, context: str, **kwargs):
109
- """
110
- Evaluate relevance.
44
+ @override
45
+ def __init__(self, model_config: dict):
46
+ current_dir = os.path.dirname(__file__)
47
+ prompty_path = os.path.join(current_dir, self.PROMPTY_FILE)
48
+ super().__init__(model_config=model_config, prompty_file=prompty_path, result_key=self.RESULT_KEY)
49
+
50
+ @override
51
+ def __call__(
52
+ self,
53
+ *,
54
+ query: Optional[str] = None,
55
+ response: Optional[str] = None,
56
+ context: Optional[str] = None,
57
+ conversation: Optional[dict] = None,
58
+ **kwargs
59
+ ):
60
+ """Evaluate relevance. Accepts either a response and context a single evaluation,
61
+ or a conversation for a multi-turn evaluation. If the conversation has more than one turn,
62
+ the evaluator will aggregate the results of each turn.
111
63
 
112
64
  :keyword query: The query to be evaluated.
113
- :paramtype query: str
65
+ :paramtype query: Optional[str]
114
66
  :keyword response: The response to be evaluated.
115
- :paramtype response: str
67
+ :paramtype response: Optional[str]
116
68
  :keyword context: The context to be evaluated.
117
- :paramtype context: str
69
+ :paramtype context: Optional[str]
70
+ :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
71
+ key "messages", and potentially a global context under the key "context". Conversation turns are expected
72
+ to be dictionaries with keys "content", "role", and possibly "context".
73
+ :paramtype conversation: Optional[Dict]
118
74
  :return: The relevance score.
119
75
  :rtype: dict
120
76
  """
121
- return async_run_allowing_running_loop(
122
- self._async_evaluator, query=query, response=response, context=context, **kwargs
123
- )
124
-
125
- def _to_async(self):
126
- return self._async_evaluator
77
+ return super().__call__(query=query, response=response, context=context, conversation=conversation, **kwargs)
@@ -3,11 +3,6 @@ name: Relevance
3
3
  description: Evaluates relevance score for QA scenario
4
4
  model:
5
5
  api: chat
6
- configuration:
7
- type: azure_openai
8
- azure_deployment: ${env:AZURE_DEPLOYMENT}
9
- api_key: ${env:AZURE_OPENAI_API_KEY}
10
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
11
6
  parameters:
12
7
  temperature: 0.0
13
8
  max_tokens: 1
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from ._chat import ChatEvaluator
5
+ from ._retrieval import RetrievalEvaluator
6
6
 
7
7
  __all__ = [
8
- "ChatEvaluator",
8
+ "RetrievalEvaluator",
9
9
  ]
@@ -11,34 +11,27 @@ import numpy as np
11
11
  from promptflow._utils.async_utils import async_run_allowing_running_loop
12
12
  from promptflow.core import AsyncPrompty
13
13
 
14
- from ...._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
14
+
15
+ from ..._common.utils import construct_prompty_model_config
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
18
19
  try:
19
- from ...._user_agent import USER_AGENT
20
+ from .._user_agent import USER_AGENT
20
21
  except ImportError:
21
22
  USER_AGENT = None
22
23
 
23
24
 
24
- class _AsyncRetrievalChatEvaluator:
25
+ class _AsyncRetrievalScoreEvaluator:
25
26
  # Constants must be defined within eval's directory to be save/loadable
26
27
  PROMPTY_FILE = "retrieval.prompty"
27
28
  LLM_CALL_TIMEOUT = 600
28
29
  DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
29
30
 
30
31
  def __init__(self, model_config: dict):
31
- ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
32
-
33
- prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
34
-
35
- # Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
36
- # https://github.com/encode/httpx/discussions/2959
37
- prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
38
-
39
- ensure_user_agent_in_aoai_model_config(
32
+ prompty_model_config = construct_prompty_model_config(
40
33
  model_config,
41
- prompty_model_config,
34
+ self.DEFAULT_OPEN_API_VERSION,
42
35
  USER_AGENT,
43
36
  )
44
37
 
@@ -101,7 +94,7 @@ class _AsyncRetrievalChatEvaluator:
101
94
  }
102
95
 
103
96
 
104
- class RetrievalChatEvaluator:
97
+ class RetrievalEvaluator:
105
98
  """
106
99
  Initialize an evaluator configured for a specific Azure OpenAI model.
107
100
 
@@ -110,11 +103,12 @@ class RetrievalChatEvaluator:
110
103
  ~azure.ai.evaluation.OpenAIModelConfiguration]
111
104
  :return: A function that evaluates and generates metrics for "chat" scenario.
112
105
  :rtype: Callable
106
+
113
107
  **Usage**
114
108
 
115
109
  .. code-block:: python
116
110
 
117
- chat_eval = RetrievalChatEvaluator(model_config)
111
+ chat_eval = RetrievalScoreEvaluator(model_config)
118
112
  conversation = [
119
113
  {"role": "user", "content": "What is the value of 2 + 2?"},
120
114
  {"role": "assistant", "content": "2 + 2 = 4", "context": {
@@ -130,18 +124,18 @@ class RetrievalChatEvaluator:
130
124
 
131
125
  .. code-block:: python
132
126
 
133
- {
134
- "gpt_retrieval": 3.0
135
- "evaluation_per_turn": {
136
- "gpt_retrieval": {
137
- "score": [1.0, 2.0, 3.0]
127
+ {
128
+ "gpt_retrieval": 3.0
129
+ "evaluation_per_turn": {
130
+ "gpt_retrieval": {
131
+ "score": [1.0, 2.0, 3.0]
132
+ }
138
133
  }
139
134
  }
140
- }
141
135
  """
142
136
 
143
137
  def __init__(self, model_config: dict):
144
- self._async_evaluator = _AsyncRetrievalChatEvaluator(model_config)
138
+ self._async_evaluator = _AsyncRetrievalScoreEvaluator(model_config)
145
139
 
146
140
  def __call__(self, *, conversation, **kwargs):
147
141
  """Evaluates retrieval score chat scenario.
@@ -3,11 +3,6 @@ name: Retrieval
3
3
  description: Evaluates retrieval score for Chat scenario
4
4
  model:
5
5
  api: chat
6
- configuration:
7
- type: azure_openai
8
- azure_deployment: ${env:AZURE_DEPLOYMENT}
9
- api_key: ${env:AZURE_OPENAI_API_KEY}
10
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
11
6
  parameters:
12
7
  temperature: 0.0
13
8
  top_p: 1.0
@@ -11,7 +11,7 @@ from promptflow.core import AsyncPrompty
11
11
 
12
12
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
13
 
14
- from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
14
+ from ..._common.utils import construct_prompty_model_config
15
15
 
16
16
  try:
17
17
  from ..._user_agent import USER_AGENT
@@ -26,17 +26,9 @@ class _AsyncSimilarityEvaluator:
26
26
  DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
27
27
 
28
28
  def __init__(self, model_config: dict):
29
- ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
30
-
31
- prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
32
-
33
- # Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
34
- # https://github.com/encode/httpx/discussions/2959
35
- prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
36
-
37
- ensure_user_agent_in_aoai_model_config(
29
+ prompty_model_config = construct_prompty_model_config(
38
30
  model_config,
39
- prompty_model_config,
31
+ self.DEFAULT_OPEN_API_VERSION,
40
32
  USER_AGENT,
41
33
  )
42
34
 
@@ -3,11 +3,6 @@ name: Similarity
3
3
  description: Evaluates similarity score for QA scenario
4
4
  model:
5
5
  api: chat
6
- configuration:
7
- type: azure_openai
8
- azure_deployment: ${env:AZURE_DEPLOYMENT}
9
- api_key: ${env:AZURE_OPENAI_API_KEY}
10
- azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
11
6
  parameters:
12
7
  temperature: 0.0
13
8
  max_tokens: 1
@@ -2,18 +2,17 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  import logging
5
- from typing import Optional
6
-
7
- from promptflow._utils.async_utils import async_run_allowing_running_loop
8
5
 
6
+ from typing import Optional
7
+ from typing_extensions import override
9
8
  from azure.ai.evaluation._common.constants import EvaluationMetrics
10
- from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
11
- from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
9
+ from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
10
+
12
11
 
13
12
  logger = logging.getLogger(__name__)
14
13
 
15
14
 
16
- class IndirectAttackEvaluator:
15
+ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
17
16
  """A Cross-Domain Prompt Injection Attack (XPIA) jailbreak evaluator.
18
17
 
19
18
  Detect whether cross domain injected attacks are present in your AI system's response.
@@ -25,7 +24,7 @@ class IndirectAttackEvaluator:
25
24
  focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
26
25
  :type eval_last_turn: bool
27
26
  :param credential: The credential for connecting to Azure AI project.
28
- :type credential: ~azure.core.credentials.TokenCredential
27
+ :type credential: Optional[~azure.core.credentials.TokenCredential]
29
28
  :return: A function that evaluates and generates metrics for XPIA chat scenario. Metrics include the overall
30
29
  evaluation label and reason for the Q/A Pair, as well as sub-labels for manipulated content, intrusion, and
31
30
  information.
@@ -51,89 +50,16 @@ class IndirectAttackEvaluator:
51
50
  }
52
51
  """
53
52
 
54
- def __init__(self, azure_ai_project: dict, eval_last_turn: bool = False, credential=None):
55
- self._evaluator = _IndirectAttackEvaluator(azure_ai_project, credential)
56
- self._eval_last_turn = eval_last_turn
57
-
58
- def __call__(
53
+ @override
54
+ def __init__(
59
55
  self,
60
- *,
61
- query: Optional[str],
62
- response: Optional[str],
63
- **kwargs,
56
+ azure_ai_project: dict,
57
+ credential: Optional[dict] = None,
58
+ eval_last_turn: bool = False,
64
59
  ):
65
- """
66
- Evaluates content according to the presence of attacks injected into the conversation context to
67
- interrupt normal expected functionality by eliciting manipulated content, intrusion and attempting
68
- to gather information outside the scope of your AI system.
69
- :keyword query: The query to be evaluated. Mutually exclusive with 'conversation'.
70
- :paramtype query: Optional[str]
71
- :keyword response: The response to be evaluated. Mutually exclusive with 'conversation'.
72
- :paramtype response: Optional[str]
73
- :return: The evaluation scores and reasoning.
74
- :rtype: dict
75
- """
76
-
77
- return self._evaluator(query=query, response=response, **kwargs)
78
-
79
-
80
- class _AsyncIndirectAttackEvaluator:
81
- def __init__(self, azure_ai_project: dict, credential=None):
82
- self._azure_ai_project = azure_ai_project
83
- self._credential = credential
84
-
85
- async def __call__(self, *, query: str, response: str, **kwargs):
86
- """
87
- Evaluates content according to this evaluator's metric.
88
- :keyword query: The query to be evaluated.
89
- :paramtype query: str
90
- :keyword response: The response to be evaluated.
91
- :paramtype response: str
92
- :return: The evaluation score computation based on the metric (self.metric).
93
- :rtype: Any
94
- """
95
- # Validate inputs
96
- # Raises value error if failed, so execution alone signifies success.
97
- if not (query and query.strip() and query != "None") or not (
98
- response and response.strip() and response != "None"
99
- ):
100
- msg = "Both 'query' and 'response' must be non-empty strings."
101
- raise EvaluationException(
102
- message=msg,
103
- internal_message=msg,
104
- error_category=ErrorCategory.MISSING_FIELD,
105
- error_blame=ErrorBlame.USER_ERROR,
106
- error_target=ErrorTarget.INDIRECT_ATTACK_EVALUATOR,
107
- )
108
-
109
- # Run score computation based on supplied metric.
110
- result = await evaluate_with_rai_service(
111
- metric_name=EvaluationMetrics.XPIA,
112
- query=query,
113
- response=response,
114
- project_scope=self._azure_ai_project,
115
- credential=self._credential,
60
+ super().__init__(
61
+ eval_metric=EvaluationMetrics.XPIA,
62
+ azure_ai_project=azure_ai_project,
63
+ credential=credential,
64
+ eval_last_turn=eval_last_turn,
116
65
  )
117
- return result
118
-
119
-
120
- class _IndirectAttackEvaluator:
121
- def __init__(self, azure_ai_project: dict, credential=None):
122
- self._async_evaluator = _AsyncIndirectAttackEvaluator(azure_ai_project, credential)
123
-
124
- def __call__(self, *, query: str, response: str, **kwargs):
125
- """
126
- Evaluates XPIA content.
127
- :keyword query: The query to be evaluated.
128
- :paramtype query: str
129
- :keyword response: The response to be evaluated.
130
- :paramtype response: str
131
- :keyword context: The context to be evaluated.
132
- :paramtype context: str
133
- :return: The XPIA score.
134
- :rtype: dict
135
- """
136
- return async_run_allowing_running_loop(self._async_evaluator, query=query, response=response, **kwargs)
137
-
138
- def _to_async(self):
139
- return self._async_evaluator
@@ -54,7 +54,6 @@ class ErrorTarget(Enum):
54
54
  EVAL_RUN = "EvalRun"
55
55
  CODE_CLIENT = "CodeClient"
56
56
  RAI_CLIENT = "RAIClient"
57
- CHAT_EVALUATOR = "ChatEvaluator"
58
57
  COHERENCE_EVALUATOR = "CoherenceEvaluator"
59
58
  CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
60
59
  ECI_EVALUATOR = "ECIEvaluator"
@@ -2,26 +2,54 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import TypedDict
5
+ from typing import Dict, Literal, TypedDict
6
6
 
7
+ from typing_extensions import NotRequired
7
8
 
8
- class AzureOpenAIModelConfigurationBase(TypedDict):
9
+
10
+ class AzureOpenAIModelConfiguration(TypedDict, total=False):
11
+ """Model Configuration for Azure OpenAI Model"""
12
+
13
+ type: Literal["azure_openai"]
14
+ """The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration"""
9
15
  azure_deployment: str
16
+ """Name of Azure OpenAI deployment to make request to"""
10
17
  azure_endpoint: str
18
+ """Endpoint of Azure OpenAI resource to make request to"""
11
19
  api_key: str
20
+ """API key of Azure OpenAI resource"""
21
+ api_version: NotRequired[str]
22
+ """(Optional) API version to use in request to Azure OpenAI deployment"""
12
23
 
13
24
 
14
- class AzureOpenAIModelConfiguration(AzureOpenAIModelConfigurationBase, total=False):
15
- api_version: str
25
+ class OpenAIModelConfiguration(TypedDict, total=False):
26
+ """Model Configuration for OpenAI Model"""
16
27
 
17
-
18
- class OpenAIModelConfiguration(TypedDict):
28
+ type: Literal["openai"]
29
+ """The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration"""
19
30
  api_key: str
20
- base_url: str
21
- organization: str
31
+ "API key needed to make request to model"
32
+ model: str
33
+ """Name of model to be used in OpenAI request"""
34
+ base_url: NotRequired[str]
35
+ """(Optional) Base URL to be used in OpenAI request"""
36
+ organization: NotRequired[str]
37
+ """(Optional) OpenAI organization"""
22
38
 
23
39
 
24
40
  class AzureAIProject(TypedDict):
41
+ """Azure AI Project Information"""
42
+
25
43
  subscription_id: str
44
+ """Azure subscription id of the project"""
26
45
  resource_group_name: str
46
+ """Azure resource group name of the project"""
27
47
  project_name: str
48
+ """Azure project name"""
49
+
50
+
51
+ class EvaluatorConfig(TypedDict, total=False):
52
+ """Configuration for an evaluator"""
53
+
54
+ column_mapping: Dict[str, str]
55
+ """Dictionary mapping evaluator input name to column in data"""
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- VERSION = "1.0.0b2"
5
+ VERSION = "1.0.0b3"
@@ -33,7 +33,8 @@ Answer must not be more than 5 words
33
33
  Answer must be picked from Text as is
34
34
  Question should be as descriptive as possible and must include as much context as possible from Text
35
35
  Output must always have the provided number of QnAs
36
- Output must be in JSON format
36
+ Output must be in JSON format.
37
+ Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable.
37
38
  Text:
38
39
  <|text_start|>
39
40
  On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States.
@@ -41,7 +41,7 @@ class Simulator:
41
41
  """
42
42
  self._validate_project_config(azure_ai_project)
43
43
  self.azure_ai_project = azure_ai_project
44
- self.azure_ai_project["api_version"] = "2024-02-15-preview"
44
+ self.azure_ai_project["api_version"] = "2024-06-01"
45
45
  self.credential = credential
46
46
 
47
47
  @staticmethod
@@ -129,7 +129,6 @@ class Simulator:
129
129
  max_conversation_turns *= 2 # account for both user and assistant turns
130
130
 
131
131
  prompty_model_config = self._build_prompty_model_config()
132
-
133
132
  if conversation_turns:
134
133
  return await self._simulate_with_predefined_turns(
135
134
  target=target,
@@ -234,8 +233,16 @@ class Simulator:
234
233
  target=target,
235
234
  progress_bar=progress_bar,
236
235
  )
237
-
238
- simulated_conversations.append(current_simulation.to_list())
236
+ simulated_conversations.append(
237
+ JsonLineChatProtocol(
238
+ {
239
+ "messages": current_simulation.to_list(),
240
+ "finish_reason": ["stop"],
241
+ "context": {},
242
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
243
+ }
244
+ )
245
+ )
239
246
 
240
247
  progress_bar.close()
241
248
  return simulated_conversations
@@ -280,7 +287,9 @@ class Simulator:
280
287
 
281
288
  while len(current_simulation) < max_conversation_turns:
282
289
  user_response_content = user_flow(
283
- task="Continue the conversation", conversation_history=current_simulation.to_list()
290
+ task="Continue the conversation",
291
+ conversation_history=current_simulation.to_list(),
292
+ **user_simulator_prompty_kwargs,
284
293
  )
285
294
  user_response = self._parse_prompty_response(response=user_response_content)
286
295
  user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
@@ -317,6 +326,7 @@ class Simulator:
317
326
  resource_name = "task_simulate.prompty"
318
327
  try:
319
328
  # Access the resource as a file path
329
+ # pylint: disable=deprecated-method
320
330
  with pkg_resources.path(package, resource_name) as prompty_path:
321
331
  return load_flow(source=str(prompty_path), model=prompty_model_config)
322
332
  except FileNotFoundError as e:
@@ -398,7 +408,6 @@ class Simulator:
398
408
  prompty_model_config=prompty_model_config,
399
409
  query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs,
400
410
  )
401
-
402
411
  try:
403
412
  query_responses = query_flow(text=text, num_queries=num_queries)
404
413
  if isinstance(query_responses, dict):
@@ -432,6 +441,7 @@ class Simulator:
432
441
  resource_name = "task_query_response.prompty"
433
442
  try:
434
443
  # Access the resource as a file path
444
+ # pylint: disable=deprecated-method
435
445
  with pkg_resources.path(package, resource_name) as prompty_path:
436
446
  return load_flow(source=str(prompty_path), model=prompty_model_config)
437
447
  except FileNotFoundError as e:
@@ -612,9 +622,10 @@ class Simulator:
612
622
  prompty_model_config=self._build_prompty_model_config(),
613
623
  user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
614
624
  )
615
-
616
625
  try:
617
- response_content = user_flow(task=task, conversation_history=conversation_history)
626
+ response_content = user_flow(
627
+ task=task, conversation_history=conversation_history, **user_simulator_prompty_kwargs
628
+ )
618
629
  user_response = self._parse_prompty_response(response=response_content)
619
630
  return user_response["content"]
620
631
  except Exception as e: