azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (150) hide show
  1. azure/ai/evaluation/__init__.py +9 -16
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +66 -0
  5. azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
  6. azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
  7. azure/ai/evaluation/_azure/_clients.py +4 -4
  8. azure/ai/evaluation/_azure/_envs.py +208 -0
  9. azure/ai/evaluation/_azure/_token_manager.py +12 -7
  10. azure/ai/evaluation/_common/__init__.py +5 -0
  11. azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
  12. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  13. azure/ai/evaluation/_common/onedp/_client.py +139 -0
  14. azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
  15. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  16. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  17. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  18. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  19. azure/ai/evaluation/_common/onedp/_validation.py +50 -0
  20. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  21. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  22. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  23. azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
  24. azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
  25. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  26. azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
  27. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
  28. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
  29. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  30. azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
  31. azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
  32. azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
  33. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
  35. azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
  36. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  38. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  39. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  40. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  41. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  42. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  43. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  44. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  54. azure/ai/evaluation/_common/rai_service.py +159 -29
  55. azure/ai/evaluation/_common/raiclient/_version.py +1 -1
  56. azure/ai/evaluation/_common/utils.py +80 -2
  57. azure/ai/evaluation/_constants.py +16 -0
  58. azure/ai/evaluation/_converters/__init__.py +1 -1
  59. azure/ai/evaluation/_converters/_ai_services.py +4 -4
  60. azure/ai/evaluation/_eval_mapping.py +71 -0
  61. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
  62. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  63. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +17 -4
  64. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  65. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  66. azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
  67. azure/ai/evaluation/_evaluate/_evaluate.py +372 -105
  68. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
  69. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
  70. azure/ai/evaluation/_evaluate/_utils.py +120 -7
  71. azure/ai/evaluation/_evaluators/_common/_base_eval.py +9 -4
  72. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
  73. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
  74. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
  75. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
  76. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
  77. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
  78. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +2 -2
  79. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
  80. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
  81. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +8 -2
  82. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
  83. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
  84. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
  85. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
  86. azure/ai/evaluation/_exceptions.py +2 -0
  87. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  88. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  89. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  90. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  91. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  92. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  93. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  94. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  95. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  96. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  97. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  98. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  99. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
  100. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
  101. azure/ai/evaluation/_legacy/_batch_engine/_result.py +7 -1
  102. azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
  103. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
  104. azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
  105. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
  106. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
  107. azure/ai/evaluation/{_red_team/_utils → _legacy/_common}/__init__.py +1 -1
  108. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
  109. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
  110. azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
  111. azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
  112. azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
  113. azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
  114. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
  115. azure/ai/evaluation/_version.py +1 -1
  116. azure/ai/evaluation/red_team/__init__.py +19 -0
  117. azure/ai/evaluation/{_red_team → red_team}/_attack_objective_generator.py +3 -0
  118. azure/ai/evaluation/{_red_team → red_team}/_attack_strategy.py +4 -1
  119. azure/ai/evaluation/{_red_team → red_team}/_red_team.py +885 -481
  120. azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  121. azure/ai/evaluation/{_red_team → red_team}/_utils/constants.py +2 -1
  122. azure/ai/evaluation/{_red_team → red_team}/_utils/formatting_utils.py +23 -22
  123. azure/ai/evaluation/{_red_team → red_team}/_utils/logging_utils.py +1 -1
  124. azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
  125. azure/ai/evaluation/{_red_team → red_team}/_utils/strategy_utils.py +9 -5
  126. azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
  127. azure/ai/evaluation/simulator/_constants.py +1 -0
  128. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
  129. azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
  130. azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
  131. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  132. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
  133. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  134. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
  135. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
  136. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
  137. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
  138. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  139. azure/ai/evaluation/simulator/_simulator.py +1 -1
  140. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA +36 -2
  141. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/RECORD +148 -80
  142. azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
  143. azure/ai/evaluation/simulator/_tracing.py +0 -89
  144. /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
  145. /azure/ai/evaluation/{_red_team → red_team}/_callback_chat_target.py +0 -0
  146. /azure/ai/evaluation/{_red_team → red_team}/_default_converter.py +0 -0
  147. /azure/ai/evaluation/{_red_team → red_team/_utils}/__init__.py +0 -0
  148. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/NOTICE.txt +0 -0
  149. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/WHEEL +0 -0
  150. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/top_level.txt +0 -0
@@ -6,12 +6,15 @@ import copy
6
6
  import json
7
7
  import time
8
8
  import uuid
9
- from typing import Any, Dict, List, Optional, cast
9
+ from typing import Any, Dict, List, Optional, cast, Union
10
10
 
11
11
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
12
12
  from azure.ai.evaluation._user_agent import USER_AGENT
13
13
  from azure.core.exceptions import HttpResponseError
14
14
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
15
+ from azure.ai.evaluation._common.onedp._client import AIProjectClient
16
+ from azure.ai.evaluation._common.onedp.models import SimulationDTO
17
+ from azure.ai.evaluation._common.constants import RAIService
15
18
 
16
19
  from .._model_tools._template_handler import TemplateParameters
17
20
  from .models import OpenAIChatCompletionsModel
@@ -40,14 +43,14 @@ class SimulationRequestDTO:
40
43
  headers: Dict[str, str],
41
44
  payload: Dict[str, Any],
42
45
  params: Dict[str, str],
43
- templatekey: str,
46
+ template_key: str,
44
47
  template_parameters: Optional[TemplateParameters],
45
48
  ):
46
49
  self.url = url
47
50
  self.headers = headers
48
51
  self.json = json.dumps(payload)
49
52
  self.params = params
50
- self.templatekey = templatekey
53
+ self.template_key = template_key
51
54
  self.templateParameters = template_parameters
52
55
 
53
56
  def to_dict(self) -> Dict:
@@ -111,7 +114,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
111
114
  async def get_conversation_completion(
112
115
  self,
113
116
  messages: List[Dict],
114
- session: AsyncHttpPipeline,
117
+ session: Union[AsyncHttpPipeline, AIProjectClient],
115
118
  role: str = "assistant", # pylint: disable=unused-argument
116
119
  **request_params,
117
120
  ) -> dict:
@@ -142,7 +145,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
142
145
 
143
146
  async def request_api(
144
147
  self,
145
- session: AsyncHttpPipeline,
148
+ session: Union[AsyncHttpPipeline, AIProjectClient],
146
149
  request_data: dict,
147
150
  ) -> dict:
148
151
  """
@@ -183,51 +186,72 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
183
186
  headers=headers,
184
187
  payload=request_data,
185
188
  params=params,
186
- templatekey=self.tkey,
189
+ template_key=self.tkey,
187
190
  template_parameters=self.tparam,
188
191
  )
189
192
 
190
193
  time_start = time.time()
191
194
  full_response = None
192
195
 
193
- response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
194
-
195
- if response.status_code != 202:
196
- raise HttpResponseError(
197
- message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
196
+ if(isinstance(session, AIProjectClient)):
197
+ sim_request_dto = SimulationDTO(
198
+ headers=headers,
199
+ params=params,
200
+ json=json.dumps(request_data),
201
+ template_key=self.tkey,
202
+ template_parameters=self.tparam,
198
203
  )
199
-
200
- response_data = response.json()
201
- self.result_url = cast(str, response_data["location"])
202
-
203
- retry_policy = AsyncRetryPolicy( # set up retry configuration
204
- retry_on_status_codes=[202], # on which statuses to retry
205
- retry_total=7,
206
- retry_backoff_factor=10.0,
207
- retry_backoff_max=180,
208
- retry_mode=RetryMode.Exponential,
209
- )
210
-
211
- # initial 15 seconds wait before attempting to fetch result
212
- # Need to wait both in this thread and in the async thread for some reason?
213
- # Someone not under a crunch and with better async understandings should dig into this more.
214
- await asyncio.sleep(15)
215
- time.sleep(15)
216
-
217
- async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
218
- token = await self.token_manager.get_token_async()
219
- proxy_headers = {
220
- "Authorization": f"Bearer {token}",
221
- "Content-Type": "application/json",
222
- "User-Agent": USER_AGENT,
223
- }
224
- response = await exp_retry_client.get( # pylint: disable=too-many-function-args,unexpected-keyword-arg
225
- self.result_url, headers=proxy_headers
204
+ response_data = session.red_teams.submit_simulation(sim_request_dto, headers=headers, params=params)
205
+ operation_id = response_data["location"].split("/")[-1]
206
+
207
+ request_count = 0
208
+ flag = True
209
+ while flag:
210
+ response = session.evaluations.operation_results(operation_id, headers=headers)
211
+ if response.status_code == 200:
212
+ response_data = cast(List[Dict], response.json())
213
+ flag = False
214
+ else:
215
+ request_count += 1
216
+ sleep_time = RAIService.SLEEP_TIME**request_count
217
+ await asyncio.sleep(sleep_time)
218
+ else:
219
+ response = await session.post(url=self.endpoint_url, headers=proxy_headers, json=sim_request_dto.to_dict())
220
+ # response.raise_for_status()
221
+ if response.status_code != 202:
222
+ raise HttpResponseError(
223
+ message=f"Received unexpected HTTP status: {response.status_code} {response.text()}", response=response
224
+ )
225
+ response_data = response.json()
226
+
227
+ self.result_url = cast(str, response_data["location"])
228
+ retry_policy = AsyncRetryPolicy( # set up retry configuration
229
+ retry_on_status_codes=[202], # on which statuses to retry
230
+ retry_total=7,
231
+ retry_backoff_factor=10.0,
232
+ retry_backoff_max=180,
233
+ retry_mode=RetryMode.Exponential,
226
234
  )
227
235
 
228
- response.raise_for_status()
229
-
230
- response_data = response.json()
236
+ # initial 15 seconds wait before attempting to fetch result
237
+ # Need to wait both in this thread and in the async thread for some reason?
238
+ # Someone not under a crunch and with better async understandings should dig into this more.
239
+ await asyncio.sleep(15)
240
+ time.sleep(15)
241
+
242
+ async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
243
+ token = await self.token_manager.get_token_async()
244
+ proxy_headers = {
245
+ "Authorization": f"Bearer {token}",
246
+ "Content-Type": "application/json",
247
+ "User-Agent": USER_AGENT,
248
+ }
249
+ response = await exp_retry_client.get( # pylint: disable=too-many-function-args,unexpected-keyword-arg
250
+ self.result_url, headers=proxy_headers
251
+ )
252
+ response.raise_for_status()
253
+ response_data = response.json()
254
+
231
255
  self.logger.info("Response: %s", response_data)
232
256
 
233
257
  # Copy the full response and return it to be saved in jsonl.
@@ -2,11 +2,12 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import Dict, List, Optional, TypedDict, cast
6
-
5
+ from typing import Dict, List, Optional, TypedDict, cast, Union
6
+ from ast import literal_eval
7
7
  from typing_extensions import NotRequired
8
8
 
9
9
  from azure.ai.evaluation._model_configurations import AzureAIProject
10
+ from azure.ai.evaluation._common.onedp._client import AIProjectClient
10
11
 
11
12
  from ._rai_client import RAIClient
12
13
 
@@ -153,7 +154,7 @@ class AdversarialTemplateHandler:
153
154
  :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
154
155
  """
155
156
 
156
- def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
157
+ def __init__(self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]) -> None:
157
158
  self.azure_ai_project = azure_ai_project
158
159
  self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
159
160
  self.rai_client = rai_client
@@ -163,8 +164,11 @@ class AdversarialTemplateHandler:
163
164
  categorized_parameters: Dict[str, _CategorizedParameter] = {}
164
165
  util = ContentHarmTemplatesUtils
165
166
 
166
- parameters = await self.rai_client.get_contentharm_parameters()
167
-
167
+ if isinstance(self.rai_client, RAIClient):
168
+ parameters = await self.rai_client.get_contentharm_parameters()
169
+ elif isinstance(self.rai_client, AIProjectClient):
170
+ parameters = literal_eval(self.rai_client.red_teams.get_template_parameters())
171
+
168
172
  for k in parameters.keys():
169
173
  template_key = util.get_template_key(k)
170
174
  categorized_parameters[template_key] = {
@@ -12,6 +12,8 @@ from abc import ABC, abstractmethod
12
12
  from collections import deque
13
13
  from typing import Deque, Dict, List, Optional, Union
14
14
  from urllib.parse import urlparse
15
+ from azure.ai.evaluation._common.onedp._client import AIProjectClient
16
+ from ._rai_client import RAIClient
15
17
 
16
18
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
19
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
@@ -78,7 +80,7 @@ class LLMBase(ABC):
78
80
  async def get_completion(
79
81
  self,
80
82
  prompt: str,
81
- session: AsyncHttpPipeline,
83
+ session: Union[AsyncHttpPipeline, AIProjectClient],
82
84
  **request_params,
83
85
  ) -> dict:
84
86
  """
@@ -100,7 +102,7 @@ class LLMBase(ABC):
100
102
  async def get_all_completions(
101
103
  self,
102
104
  prompts: List[str],
103
- session: AsyncHttpPipeline,
105
+ session: Union[AsyncHttpPipeline, AIProjectClient],
104
106
  api_call_max_parallel_count: int,
105
107
  api_call_delay_seconds: float,
106
108
  request_error_rate_threshold: float,
@@ -120,7 +122,7 @@ class LLMBase(ABC):
120
122
  async def get_conversation_completion(
121
123
  self,
122
124
  messages: List[dict],
123
- session: AsyncHttpPipeline,
125
+ session: Union[AsyncHttpPipeline, AIProjectClient],
124
126
  role: str,
125
127
  **request_params,
126
128
  ) -> dict:
@@ -274,7 +276,7 @@ class OpenAICompletionsModel(LLMBase):
274
276
  async def get_conversation_completion(
275
277
  self,
276
278
  messages: List[dict],
277
- session: AsyncHttpPipeline,
279
+ session: Union[AsyncHttpPipeline, AIProjectClient],
278
280
  role: str = "assistant",
279
281
  **request_params,
280
282
  ) -> dict:
@@ -304,7 +306,7 @@ class OpenAICompletionsModel(LLMBase):
304
306
  async def get_all_completions( # type: ignore[override]
305
307
  self,
306
308
  prompts: List[Dict[str, str]],
307
- session: AsyncHttpPipeline,
309
+ session: Union[AsyncHttpPipeline, AIProjectClient],
308
310
  api_call_max_parallel_count: int = 1,
309
311
  api_call_delay_seconds: float = 0.1,
310
312
  request_error_rate_threshold: float = 0.5,
@@ -372,7 +374,7 @@ class OpenAICompletionsModel(LLMBase):
372
374
  self,
373
375
  request_datas: List[dict],
374
376
  output_collector: List,
375
- session: AsyncHttpPipeline,
377
+ session: Union[AsyncHttpPipeline, AIProjectClient],
376
378
  api_call_delay_seconds: float = 0.1,
377
379
  request_error_rate_threshold: float = 0.5,
378
380
  ) -> None:
@@ -433,7 +435,7 @@ class OpenAICompletionsModel(LLMBase):
433
435
 
434
436
  async def request_api(
435
437
  self,
436
- session: AsyncHttpPipeline,
438
+ session: Union[AsyncHttpPipeline, AIProjectClient],
437
439
  request_data: dict,
438
440
  ) -> dict:
439
441
  """
@@ -476,11 +478,12 @@ class OpenAICompletionsModel(LLMBase):
476
478
  time_start = time.time()
477
479
  full_response = None
478
480
 
479
- response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
480
-
481
- response.raise_for_status()
482
-
483
- response_data = response.json()
481
+ if(isinstance(session, AIProjectClient)):
482
+ response_data = session.red_teams.submit_simulation(request_data, headers, params)
483
+ else:
484
+ response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
485
+ response.raise_for_status()
486
+ response_data = response.json()
484
487
 
485
488
  self.logger.info(f"Response: {response_data}")
486
489
 
@@ -533,7 +536,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
533
536
  async def get_conversation_completion(
534
537
  self,
535
538
  messages: List[dict],
536
- session: AsyncHttpPipeline,
539
+ session: Union[AsyncHttpPipeline, AIProjectClient],
537
540
  role: str = "assistant",
538
541
  **request_params,
539
542
  ) -> dict:
@@ -544,7 +547,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
544
547
  ----------
545
548
  messages: List of messages to query the model with.
546
549
  Expected format: [{"role": "user", "content": "Hello!"}, ...]
547
- session: AsyncHttpPipeline object to query the model with.
550
+ session: Union[AsyncHttpPipeline, AIProjectClient] object to query the model with.
548
551
  role: Not used for this model, since it is a chat model.
549
552
  request_params: Additional parameters to pass to the model.
550
553
  """
@@ -560,7 +563,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
560
563
  async def get_completion(
561
564
  self,
562
565
  prompt: str,
563
- session: AsyncHttpPipeline,
566
+ session: Union[AsyncHttpPipeline, AIProjectClient],
564
567
  **request_params,
565
568
  ) -> dict:
566
569
  """
@@ -569,7 +572,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
569
572
  Parameters
570
573
  ----------
571
574
  prompt: Prompt str to query model with.
572
- session: AsyncHttpPipeline object to use for the request.
575
+ session: Union[AsyncHttpPipeline, AIProjectClient] object to use for the request.
573
576
  **request_params: Additional parameters to pass to the request.
574
577
  """
575
578
  messages = [{"role": "system", "content": prompt}]
@@ -583,7 +586,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
583
586
  async def get_all_completions(
584
587
  self,
585
588
  prompts: List[str], # type: ignore[override]
586
- session: AsyncHttpPipeline,
589
+ session: Union[AsyncHttpPipeline, AIProjectClient],
587
590
  api_call_max_parallel_count: int = 1,
588
591
  api_call_delay_seconds: float = 0.1,
589
592
  request_error_rate_threshold: float = 0.5,
@@ -11,7 +11,7 @@ import re
11
11
  import warnings
12
12
  from typing import Any, Callable, Dict, List, Optional, Union, Tuple
13
13
 
14
- from promptflow.core import AsyncPrompty
14
+ from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
15
15
  from tqdm import tqdm
16
16
 
17
17
  from azure.ai.evaluation._common._experimental import experimental
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.4.0
3
+ Version: 1.6.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -28,8 +28,14 @@ Requires-Dist: azure-identity>=1.16.0
28
28
  Requires-Dist: azure-core>=1.30.2
29
29
  Requires-Dist: nltk>=3.9.1
30
30
  Requires-Dist: azure-storage-blob>=12.10.0
31
+ Requires-Dist: httpx>=0.25.1
32
+ Requires-Dist: pandas<3.0.0,>=2.1.2
33
+ Requires-Dist: openai>=1.73.0
34
+ Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
35
+ Requires-Dist: msrest>=0.6.21
36
+ Requires-Dist: Jinja2>=3.1.6
31
37
  Provides-Extra: redteam
32
- Requires-Dist: pyrit>=0.8.0; extra == "redteam"
38
+ Requires-Dist: pyrit==0.8.1; extra == "redteam"
33
39
 
34
40
  # Azure AI Evaluation client library for Python
35
41
 
@@ -376,6 +382,34 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
376
382
 
377
383
  # Release History
378
384
 
385
+ ## 1.6.0 (2025-05-07)
386
+
387
+ ### Features Added
388
+ - New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
389
+ - Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
390
+ - AzureOpenAIGrader (general class for experienced users)
391
+ - AzureOpenAILabelGrader
392
+ - AzureOpenAIStringCheckGrader
393
+ - AzureOpenAITextSimilarityGrader
394
+
395
+ ### Breaking Changes
396
+ - In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
397
+
398
+ ### Bugs Fixed
399
+ - Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
400
+ ```
401
+ "query1": "some query", "response": "some response"
402
+ ```
403
+ throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
404
+ Now, users may import data containing fields with numeric characters.
405
+
406
+
407
+ ## 1.5.0 (2025-04-04)
408
+
409
+ ### Features Added
410
+
411
+ - New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
412
+
379
413
  ## 1.4.0 (2025-03-27)
380
414
 
381
415
  ### Features Added