azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (144) hide show
  1. azure/ai/evaluation/__init__.py +10 -0
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +66 -0
  5. azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
  6. azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
  7. azure/ai/evaluation/_azure/_clients.py +4 -4
  8. azure/ai/evaluation/_azure/_envs.py +208 -0
  9. azure/ai/evaluation/_azure/_token_manager.py +12 -7
  10. azure/ai/evaluation/_common/__init__.py +7 -0
  11. azure/ai/evaluation/_common/evaluation_onedp_client.py +163 -0
  12. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  13. azure/ai/evaluation/_common/onedp/_client.py +139 -0
  14. azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
  15. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  16. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  17. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  18. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  19. azure/ai/evaluation/_common/onedp/_validation.py +50 -0
  20. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  21. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  22. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  23. azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
  24. azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
  25. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  26. azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
  27. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
  28. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
  29. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  30. azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
  31. azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
  32. azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
  33. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
  35. azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
  36. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  38. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  39. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  40. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  41. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  42. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  43. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  44. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  54. azure/ai/evaluation/_common/rai_service.py +165 -34
  55. azure/ai/evaluation/_common/raiclient/_version.py +1 -1
  56. azure/ai/evaluation/_common/utils.py +79 -1
  57. azure/ai/evaluation/_constants.py +16 -0
  58. azure/ai/evaluation/_converters/_ai_services.py +162 -118
  59. azure/ai/evaluation/_converters/_models.py +76 -6
  60. azure/ai/evaluation/_eval_mapping.py +73 -0
  61. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
  62. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
  63. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
  64. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
  65. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  66. azure/ai/evaluation/_evaluate/_evaluate.py +325 -76
  67. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +553 -0
  68. azure/ai/evaluation/_evaluate/_utils.py +117 -4
  69. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
  70. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
  71. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
  72. azure/ai/evaluation/_evaluators/_common/_base_eval.py +12 -3
  73. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
  74. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
  75. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
  76. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
  77. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
  78. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
  79. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
  80. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
  81. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +469 -0
  82. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
  83. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +11 -1
  84. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
  85. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -1
  86. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +16 -2
  87. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
  88. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
  89. azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
  90. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +11 -1
  91. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -2
  92. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
  93. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
  94. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
  95. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
  96. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +11 -1
  97. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +16 -2
  98. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +86 -12
  99. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
  100. azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
  101. azure/ai/evaluation/_exceptions.py +2 -0
  102. azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
  103. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  104. azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
  105. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
  106. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
  107. azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
  108. azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
  109. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
  110. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
  111. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
  112. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  113. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
  114. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
  115. azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
  116. azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
  117. azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
  118. azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
  119. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +114 -22
  120. azure/ai/evaluation/_version.py +1 -1
  121. azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
  122. azure/ai/evaluation/red_team/_red_team.py +976 -546
  123. azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
  124. azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
  125. azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
  126. azure/ai/evaluation/simulator/_constants.py +1 -0
  127. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
  128. azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
  129. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  130. azure/ai/evaluation/simulator/_direct_attack_simulator.py +38 -25
  131. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  132. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +43 -28
  133. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  134. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +26 -18
  135. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
  136. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
  137. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +15 -10
  138. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  139. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/METADATA +49 -3
  140. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/RECORD +144 -86
  141. /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
  142. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/NOTICE.txt +0 -0
  143. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/WHEEL +0 -0
  144. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/top_level.txt +0 -0
@@ -6,12 +6,15 @@ import copy
6
6
  import json
7
7
  import time
8
8
  import uuid
9
- from typing import Any, Dict, List, Optional, cast
9
+ from typing import Any, Dict, List, Optional, cast, Union
10
10
 
11
11
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
12
12
  from azure.ai.evaluation._user_agent import USER_AGENT
13
13
  from azure.core.exceptions import HttpResponseError
14
14
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
15
+ from azure.ai.evaluation._common.onedp._client import AIProjectClient
16
+ from azure.ai.evaluation._common.onedp.models import SimulationDTO
17
+ from azure.ai.evaluation._common.constants import RAIService
15
18
 
16
19
  from .._model_tools._template_handler import TemplateParameters
17
20
  from .models import OpenAIChatCompletionsModel
@@ -40,14 +43,14 @@ class SimulationRequestDTO:
40
43
  headers: Dict[str, str],
41
44
  payload: Dict[str, Any],
42
45
  params: Dict[str, str],
43
- templatekey: str,
46
+ template_key: str,
44
47
  template_parameters: Optional[TemplateParameters],
45
48
  ):
46
49
  self.url = url
47
50
  self.headers = headers
48
51
  self.json = json.dumps(payload)
49
52
  self.params = params
50
- self.templatekey = templatekey
53
+ self.template_key = template_key
51
54
  self.templateParameters = template_parameters
52
55
 
53
56
  def to_dict(self) -> Dict:
@@ -111,7 +114,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
111
114
  async def get_conversation_completion(
112
115
  self,
113
116
  messages: List[Dict],
114
- session: AsyncHttpPipeline,
117
+ session: Union[AsyncHttpPipeline, AIProjectClient],
115
118
  role: str = "assistant", # pylint: disable=unused-argument
116
119
  **request_params,
117
120
  ) -> dict:
@@ -142,7 +145,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
142
145
 
143
146
  async def request_api(
144
147
  self,
145
- session: AsyncHttpPipeline,
148
+ session: Union[AsyncHttpPipeline, AIProjectClient],
146
149
  request_data: dict,
147
150
  ) -> dict:
148
151
  """
@@ -183,51 +186,72 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
183
186
  headers=headers,
184
187
  payload=request_data,
185
188
  params=params,
186
- templatekey=self.tkey,
189
+ template_key=self.tkey,
187
190
  template_parameters=self.tparam,
188
191
  )
189
192
 
190
193
  time_start = time.time()
191
194
  full_response = None
192
195
 
193
- response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
194
-
195
- if response.status_code != 202:
196
- raise HttpResponseError(
197
- message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
196
+ if(isinstance(session, AIProjectClient)):
197
+ sim_request_dto = SimulationDTO(
198
+ headers=headers,
199
+ params=params,
200
+ json=json.dumps(request_data),
201
+ template_key=self.tkey,
202
+ template_parameters=self.tparam,
198
203
  )
199
-
200
- response_data = response.json()
201
- self.result_url = cast(str, response_data["location"])
202
-
203
- retry_policy = AsyncRetryPolicy( # set up retry configuration
204
- retry_on_status_codes=[202], # on which statuses to retry
205
- retry_total=7,
206
- retry_backoff_factor=10.0,
207
- retry_backoff_max=180,
208
- retry_mode=RetryMode.Exponential,
209
- )
210
-
211
- # initial 15 seconds wait before attempting to fetch result
212
- # Need to wait both in this thread and in the async thread for some reason?
213
- # Someone not under a crunch and with better async understandings should dig into this more.
214
- await asyncio.sleep(15)
215
- time.sleep(15)
216
-
217
- async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
218
- token = await self.token_manager.get_token_async()
219
- proxy_headers = {
220
- "Authorization": f"Bearer {token}",
221
- "Content-Type": "application/json",
222
- "User-Agent": USER_AGENT,
223
- }
224
- response = await exp_retry_client.get( # pylint: disable=too-many-function-args,unexpected-keyword-arg
225
- self.result_url, headers=proxy_headers
204
+ response_data = session.red_teams.submit_simulation(sim_request_dto, headers=headers, params=params)
205
+ operation_id = response_data["location"].split("/")[-1]
206
+
207
+ request_count = 0
208
+ flag = True
209
+ while flag:
210
+ response = session.evaluations.operation_results(operation_id, headers=headers)
211
+ if response.status_code == 200:
212
+ response_data = cast(List[Dict], response.json())
213
+ flag = False
214
+ else:
215
+ request_count += 1
216
+ sleep_time = RAIService.SLEEP_TIME**request_count
217
+ await asyncio.sleep(sleep_time)
218
+ else:
219
+ response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
220
+ # response.raise_for_status()
221
+ if response.status_code != 202:
222
+ raise HttpResponseError(
223
+ message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
224
+ )
225
+ response_data = response.json()
226
+
227
+ self.result_url = cast(str, response_data["location"])
228
+ retry_policy = AsyncRetryPolicy( # set up retry configuration
229
+ retry_on_status_codes=[202], # on which statuses to retry
230
+ retry_total=7,
231
+ retry_backoff_factor=10.0,
232
+ retry_backoff_max=180,
233
+ retry_mode=RetryMode.Exponential,
226
234
  )
227
235
 
228
- response.raise_for_status()
229
-
230
- response_data = response.json()
236
+ # initial 15 seconds wait before attempting to fetch result
237
+ # Need to wait both in this thread and in the async thread for some reason?
238
+ # Someone not under a crunch and with better async understandings should dig into this more.
239
+ await asyncio.sleep(15)
240
+ time.sleep(15)
241
+
242
+ async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
243
+ token = await self.token_manager.get_token_async()
244
+ proxy_headers = {
245
+ "Authorization": f"Bearer {token}",
246
+ "Content-Type": "application/json",
247
+ "User-Agent": USER_AGENT,
248
+ }
249
+ response = await exp_retry_client.get( # pylint: disable=too-many-function-args,unexpected-keyword-arg
250
+ self.result_url, headers=proxy_headers
251
+ )
252
+ response.raise_for_status()
253
+ response_data = response.json()
254
+
231
255
  self.logger.info("Response: %s", response_data)
232
256
 
233
257
  # Copy the full response and return it to be saved in jsonl.
@@ -2,11 +2,12 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import Dict, List, Optional, TypedDict, cast
6
-
5
+ from typing import Dict, List, Optional, TypedDict, cast, Union
6
+ from ast import literal_eval
7
7
  from typing_extensions import NotRequired
8
8
 
9
9
  from azure.ai.evaluation._model_configurations import AzureAIProject
10
+ from azure.ai.evaluation._common.onedp._client import AIProjectClient
10
11
 
11
12
  from ._rai_client import RAIClient
12
13
 
@@ -145,15 +146,16 @@ class AdversarialTemplate:
145
146
 
146
147
  class AdversarialTemplateHandler:
147
148
  """
148
- Adversarial template handler constructor.
149
+ Initialize the AdversarialTemplateHandler.
149
150
 
150
- :param azure_ai_project: The Azure AI project.
151
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
152
- :param rai_client: The RAI client.
153
- :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
151
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
152
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
153
+ :type azure_ai_project: Union[str, AzureAIProject]
154
+ :param rai_client: The RAI client or AI Project client used for fetching parameters.
155
+ :type rai_client: Union[~azure.ai.evaluation.simulator._model_tools.RAIClient, ~azure.ai.evaluation._common.onedp._client.AIProjectClient]
154
156
  """
155
157
 
156
- def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
158
+ def __init__(self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]) -> None:
157
159
  self.azure_ai_project = azure_ai_project
158
160
  self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
159
161
  self.rai_client = rai_client
@@ -163,8 +165,11 @@ class AdversarialTemplateHandler:
163
165
  categorized_parameters: Dict[str, _CategorizedParameter] = {}
164
166
  util = ContentHarmTemplatesUtils
165
167
 
166
- parameters = await self.rai_client.get_contentharm_parameters()
167
-
168
+ if isinstance(self.rai_client, RAIClient):
169
+ parameters = await self.rai_client.get_contentharm_parameters()
170
+ elif isinstance(self.rai_client, AIProjectClient):
171
+ parameters = literal_eval(self.rai_client.red_teams.get_template_parameters())
172
+
168
173
  for k in parameters.keys():
169
174
  template_key = util.get_template_key(k)
170
175
  categorized_parameters[template_key] = {
@@ -12,6 +12,8 @@ from abc import ABC, abstractmethod
12
12
  from collections import deque
13
13
  from typing import Deque, Dict, List, Optional, Union
14
14
  from urllib.parse import urlparse
15
+ from azure.ai.evaluation._common.onedp._client import AIProjectClient
16
+ from ._rai_client import RAIClient
15
17
 
16
18
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
19
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
@@ -78,7 +80,7 @@ class LLMBase(ABC):
78
80
  async def get_completion(
79
81
  self,
80
82
  prompt: str,
81
- session: AsyncHttpPipeline,
83
+ session: Union[AsyncHttpPipeline, AIProjectClient],
82
84
  **request_params,
83
85
  ) -> dict:
84
86
  """
@@ -100,7 +102,7 @@ class LLMBase(ABC):
100
102
  async def get_all_completions(
101
103
  self,
102
104
  prompts: List[str],
103
- session: AsyncHttpPipeline,
105
+ session: Union[AsyncHttpPipeline, AIProjectClient],
104
106
  api_call_max_parallel_count: int,
105
107
  api_call_delay_seconds: float,
106
108
  request_error_rate_threshold: float,
@@ -120,7 +122,7 @@ class LLMBase(ABC):
120
122
  async def get_conversation_completion(
121
123
  self,
122
124
  messages: List[dict],
123
- session: AsyncHttpPipeline,
125
+ session: Union[AsyncHttpPipeline, AIProjectClient],
124
126
  role: str,
125
127
  **request_params,
126
128
  ) -> dict:
@@ -274,7 +276,7 @@ class OpenAICompletionsModel(LLMBase):
274
276
  async def get_conversation_completion(
275
277
  self,
276
278
  messages: List[dict],
277
- session: AsyncHttpPipeline,
279
+ session: Union[AsyncHttpPipeline, AIProjectClient],
278
280
  role: str = "assistant",
279
281
  **request_params,
280
282
  ) -> dict:
@@ -304,7 +306,7 @@ class OpenAICompletionsModel(LLMBase):
304
306
  async def get_all_completions( # type: ignore[override]
305
307
  self,
306
308
  prompts: List[Dict[str, str]],
307
- session: AsyncHttpPipeline,
309
+ session: Union[AsyncHttpPipeline, AIProjectClient],
308
310
  api_call_max_parallel_count: int = 1,
309
311
  api_call_delay_seconds: float = 0.1,
310
312
  request_error_rate_threshold: float = 0.5,
@@ -372,7 +374,7 @@ class OpenAICompletionsModel(LLMBase):
372
374
  self,
373
375
  request_datas: List[dict],
374
376
  output_collector: List,
375
- session: AsyncHttpPipeline,
377
+ session: Union[AsyncHttpPipeline, AIProjectClient],
376
378
  api_call_delay_seconds: float = 0.1,
377
379
  request_error_rate_threshold: float = 0.5,
378
380
  ) -> None:
@@ -433,7 +435,7 @@ class OpenAICompletionsModel(LLMBase):
433
435
 
434
436
  async def request_api(
435
437
  self,
436
- session: AsyncHttpPipeline,
438
+ session: Union[AsyncHttpPipeline, AIProjectClient],
437
439
  request_data: dict,
438
440
  ) -> dict:
439
441
  """
@@ -476,11 +478,12 @@ class OpenAICompletionsModel(LLMBase):
476
478
  time_start = time.time()
477
479
  full_response = None
478
480
 
479
- response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
480
-
481
- response.raise_for_status()
482
-
483
- response_data = response.json()
481
+ if(isinstance(session, AIProjectClient)):
482
+ response_data = session.red_teams.submit_simulation(request_data, headers, params)
483
+ else:
484
+ response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
485
+ response.raise_for_status()
486
+ response_data = response.json()
484
487
 
485
488
  self.logger.info(f"Response: {response_data}")
486
489
 
@@ -533,7 +536,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
533
536
  async def get_conversation_completion(
534
537
  self,
535
538
  messages: List[dict],
536
- session: AsyncHttpPipeline,
539
+ session: Union[AsyncHttpPipeline, AIProjectClient],
537
540
  role: str = "assistant",
538
541
  **request_params,
539
542
  ) -> dict:
@@ -544,7 +547,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
544
547
  ----------
545
548
  messages: List of messages to query the model with.
546
549
  Expected format: [{"role": "user", "content": "Hello!"}, ...]
547
- session: AsyncHttpPipeline object to query the model with.
550
+ session: Union[AsyncHttpPipeline, AIProjectClient] object to query the model with.
548
551
  role: Not used for this model, since it is a chat model.
549
552
  request_params: Additional parameters to pass to the model.
550
553
  """
@@ -560,7 +563,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
560
563
  async def get_completion(
561
564
  self,
562
565
  prompt: str,
563
- session: AsyncHttpPipeline,
566
+ session: Union[AsyncHttpPipeline, AIProjectClient],
564
567
  **request_params,
565
568
  ) -> dict:
566
569
  """
@@ -569,7 +572,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
569
572
  Parameters
570
573
  ----------
571
574
  prompt: Prompt str to query model with.
572
- session: AsyncHttpPipeline object to use for the request.
575
+ session: Union[AsyncHttpPipeline, AIProjectClient] object to use for the request.
573
576
  **request_params: Additional parameters to pass to the request.
574
577
  """
575
578
  messages = [{"role": "system", "content": prompt}]
@@ -583,7 +586,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
583
586
  async def get_all_completions(
584
587
  self,
585
588
  prompts: List[str], # type: ignore[override]
586
- session: AsyncHttpPipeline,
589
+ session: Union[AsyncHttpPipeline, AIProjectClient],
587
590
  api_call_max_parallel_count: int = 1,
588
591
  api_call_delay_seconds: float = 0.1,
589
592
  request_error_rate_threshold: float = 0.5,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.5.0
3
+ Version: 1.7.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -30,9 +30,11 @@ Requires-Dist: nltk>=3.9.1
30
30
  Requires-Dist: azure-storage-blob>=12.10.0
31
31
  Requires-Dist: httpx>=0.25.1
32
32
  Requires-Dist: pandas<3.0.0,>=2.1.2
33
- Requires-Dist: openai>=1.40.0
33
+ Requires-Dist: openai>=1.78.0
34
34
  Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
35
35
  Requires-Dist: msrest>=0.6.21
36
+ Requires-Dist: Jinja2>=3.1.6
37
+ Requires-Dist: aiohttp>=3.0
36
38
  Provides-Extra: redteam
37
39
  Requires-Dist: pyrit==0.8.1; extra == "redteam"
38
40
 
@@ -114,13 +116,23 @@ result = relevance_evaluator(
114
116
  response="The capital of Japan is Tokyo."
115
117
  )
116
118
 
117
- # AI assisted safety evaluator
119
+ # There are two ways to provide Azure AI Project.
120
+ # Option #1 : Using Azure AI Project Details
118
121
  azure_ai_project = {
119
122
  "subscription_id": "<subscription_id>",
120
123
  "resource_group_name": "<resource_group_name>",
121
124
  "project_name": "<project_name>",
122
125
  }
123
126
 
127
+ violence_evaluator = ViolenceEvaluator(azure_ai_project)
128
+ result = violence_evaluator(
129
+ query="What is the capital of France?",
130
+ response="Paris."
131
+ )
132
+
133
+ # Option # 2 : Using Azure AI Project Url
134
+ azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
135
+
124
136
  violence_evaluator = ViolenceEvaluator(azure_ai_project)
125
137
  result = violence_evaluator(
126
138
  query="What is the capital of France?",
@@ -271,11 +283,18 @@ with open("simulator_output.jsonl", "w") as f:
271
283
  ```python
272
284
  from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
273
285
  from azure.identity import DefaultAzureCredential
286
+
287
+ # There are two ways to provide Azure AI Project.
288
+ # Option #1 : Using Azure AI Project
274
289
  azure_ai_project = {
275
290
  "subscription_id": <subscription_id>,
276
291
  "resource_group_name": <resource_group_name>,
277
292
  "project_name": <project_name>
278
293
  }
294
+
295
+ # Option #2 : Using Azure AI Project Url
296
+ azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
297
+
279
298
  scenario = AdversarialScenario.ADVERSARIAL_QA
280
299
  simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
281
300
 
@@ -381,6 +400,33 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
381
400
 
382
401
  # Release History
383
402
 
403
+ ## 1.7.0 (2025-05-12)
404
+
405
+ ### Bugs Fixed
406
+ - azure-ai-evaluation failed with module not found [#40992](https://github.com/Azure/azure-sdk-for-python/issues/40992)
407
+
408
+ ## 1.6.0 (2025-05-07)
409
+
410
+ ### Features Added
411
+ - New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
412
+ - Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
413
+ - AzureOpenAIGrader (general class for experienced users)
414
+ - AzureOpenAILabelGrader
415
+ - AzureOpenAIStringCheckGrader
416
+ - AzureOpenAITextSimilarityGrader
417
+
418
+ ### Breaking Changes
419
+ - In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
420
+
421
+ ### Bugs Fixed
422
+ - Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
423
+ ```
424
+ "query1": "some query", "response": "some response"
425
+ ```
426
+ throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
427
+ Now, users may import data containing fields with numeric characters.
428
+
429
+
384
430
  ## 1.5.0 (2025-04-04)
385
431
 
386
432
  ### Features Added