azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (123) hide show
  1. azure/ai/evaluation/__init__.py +9 -0
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +66 -0
  5. azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
  6. azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
  7. azure/ai/evaluation/_azure/_clients.py +4 -4
  8. azure/ai/evaluation/_azure/_envs.py +208 -0
  9. azure/ai/evaluation/_azure/_token_manager.py +12 -7
  10. azure/ai/evaluation/_common/__init__.py +5 -0
  11. azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
  12. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  13. azure/ai/evaluation/_common/onedp/_client.py +139 -0
  14. azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
  15. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  16. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  17. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  18. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  19. azure/ai/evaluation/_common/onedp/_validation.py +50 -0
  20. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  21. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  22. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  23. azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
  24. azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
  25. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  26. azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
  27. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
  28. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
  29. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  30. azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
  31. azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
  32. azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
  33. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
  35. azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
  36. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  38. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  39. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  40. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  41. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  42. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  43. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  44. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  54. azure/ai/evaluation/_common/rai_service.py +158 -28
  55. azure/ai/evaluation/_common/raiclient/_version.py +1 -1
  56. azure/ai/evaluation/_common/utils.py +79 -1
  57. azure/ai/evaluation/_constants.py +16 -0
  58. azure/ai/evaluation/_eval_mapping.py +71 -0
  59. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
  60. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
  61. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
  62. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
  63. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  64. azure/ai/evaluation/_evaluate/_evaluate.py +325 -74
  65. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
  66. azure/ai/evaluation/_evaluate/_utils.py +117 -4
  67. azure/ai/evaluation/_evaluators/_common/_base_eval.py +8 -3
  68. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
  69. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
  70. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
  71. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
  72. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
  73. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
  74. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
  75. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
  76. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +7 -2
  77. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
  78. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
  79. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
  80. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
  81. azure/ai/evaluation/_exceptions.py +2 -0
  82. azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
  83. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  84. azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
  85. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
  86. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
  87. azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
  88. azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
  89. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
  90. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
  91. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
  92. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  93. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
  94. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
  95. azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
  96. azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
  97. azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
  98. azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
  99. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
  100. azure/ai/evaluation/_version.py +1 -1
  101. azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
  102. azure/ai/evaluation/red_team/_red_team.py +825 -450
  103. azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
  104. azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
  105. azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
  106. azure/ai/evaluation/simulator/_constants.py +1 -0
  107. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
  108. azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
  109. azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
  110. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  111. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
  112. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  113. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
  114. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
  115. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
  116. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
  117. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  118. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA +25 -2
  119. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/RECORD +123 -65
  120. /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
  121. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/NOTICE.txt +0 -0
  122. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/WHEEL +0 -0
  123. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/top_level.txt +0 -0
@@ -2,20 +2,27 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
+ import asyncio
5
6
  import re
7
+
8
+ from logging import Logger
6
9
  from os import PathLike
7
10
  from pathlib import Path
8
- from typing import Any, AsyncGenerator, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union, cast
11
+ from typing import Any, AsyncGenerator, Awaitable, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union, cast
9
12
 
10
- from openai import AsyncAzureOpenAI, AsyncOpenAI, NotGiven
13
+ from openai import AsyncAzureOpenAI, AsyncOpenAI, NotGiven, OpenAIError
14
+ from openai.lib.azure import AsyncAzureADTokenProvider
15
+ from azure.core.credentials import TokenCredential
16
+ from azure.core.credentials_async import AsyncTokenCredential
11
17
 
12
18
  from azure.ai.evaluation._exceptions import ErrorTarget
13
- from azure.ai.evaluation._constants import DefaultOpenEncoding
19
+ from azure.ai.evaluation._constants import DefaultOpenEncoding, TokenScope
14
20
  from azure.ai.evaluation._legacy.prompty._exceptions import (
15
21
  InvalidInputError,
16
22
  PromptyException,
17
23
  MissingRequiredInputError,
18
24
  NotSupportedError,
25
+ WrappedOpenAIError,
19
26
  )
20
27
  from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection, Connection, OpenAIConnection
21
28
  from azure.ai.evaluation._legacy.prompty._yaml_utils import load_yaml_string
@@ -25,10 +32,14 @@ from azure.ai.evaluation._legacy.prompty._utils import (
25
32
  OpenAIChatResponseType,
26
33
  build_messages,
27
34
  format_llm_response,
35
+ openai_error_retryable,
28
36
  prepare_open_ai_request_params,
29
37
  resolve_references,
30
38
  update_dict_recursively,
31
39
  )
40
+ from azure.ai.evaluation._constants import DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
41
+ from azure.ai.evaluation._legacy._common._logging import get_logger
42
+ from azure.ai.evaluation._legacy._common._async_token_provider import AsyncAzureTokenProvider
32
43
 
33
44
 
34
45
  PROMPTY_EXTENSION: Final[str] = ".prompty"
@@ -124,10 +135,24 @@ class AsyncPrompty:
124
135
  def __init__(
125
136
  self,
126
137
  path: Union[str, PathLike],
138
+ *,
139
+ logger: Optional[Logger] = None,
140
+ token_credential: Optional[Union[TokenCredential, AsyncTokenCredential]] = None,
141
+ is_reasoning_model: bool = False,
127
142
  **kwargs: Any,
128
143
  ):
129
144
  path = Path(path)
130
145
  configs, self._template = self._parse_prompty(path)
146
+
147
+ if is_reasoning_model:
148
+ parameters = configs.get("model", {}).get("parameters", {})
149
+ if "max_tokens" in parameters:
150
+ parameters.pop("max_tokens", None)
151
+ parameters["max_completion_tokens"] = DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
152
+ # Remove unsupported parameters for reasoning models
153
+ for key in ["temperature", "top_p", "presence_penalty", "frequency_penalty"]:
154
+ parameters.pop(key, None)
155
+
131
156
  configs = resolve_references(configs, base_path=path.parent)
132
157
  configs = update_dict_recursively(configs, resolve_references(kwargs, base_path=path.parent))
133
158
 
@@ -142,6 +167,9 @@ class AsyncPrompty:
142
167
  self._inputs: Dict[str, Any] = configs.get("inputs", {})
143
168
  self._outputs: Dict[str, Any] = configs.get("outputs", {})
144
169
  self._name: str = configs.get("name", path.stem)
170
+ self._logger = logger or get_logger(__name__)
171
+ self._token_credential: Union[TokenCredential, AsyncTokenCredential] = \
172
+ token_credential or AsyncAzureTokenProvider()
145
173
 
146
174
  @property
147
175
  def path(self) -> Path:
@@ -234,9 +262,6 @@ class AsyncPrompty:
234
262
 
235
263
  return resolved_inputs
236
264
 
237
- # TODO ralphe: error handling
238
- # @trace
239
- # @handle_openai_error()
240
265
  async def __call__( # pylint: disable=docstring-keyword-should-match-keyword-only
241
266
  self,
242
267
  **kwargs: Any,
@@ -257,7 +282,7 @@ class AsyncPrompty:
257
282
  messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
258
283
  params = prepare_open_ai_request_params(self._model, messages)
259
284
 
260
- timeout: Union[NotGiven, float] = NotGiven()
285
+ timeout: Optional[float] = None
261
286
  if timeout_val := cast(Any, kwargs.get("timeout", None)):
262
287
  timeout = float(timeout_val)
263
288
 
@@ -273,6 +298,9 @@ class AsyncPrompty:
273
298
  azure_deployment=connection.azure_deployment,
274
299
  api_version=connection.api_version,
275
300
  max_retries=max_retries,
301
+ azure_ad_token_provider=(self.get_token_provider(self._token_credential)
302
+ if not connection.api_key
303
+ else None),
276
304
  )
277
305
  elif isinstance(connection, OpenAIConnection):
278
306
  api_client = AsyncOpenAI(
@@ -286,8 +314,10 @@ class AsyncPrompty:
286
314
  f"'{type(connection).__name__}' is not a supported connection type.", target=ErrorTarget.EVAL_RUN
287
315
  )
288
316
 
289
- response: OpenAIChatResponseType = await api_client.with_options(timeout=timeout).chat.completions.create(
290
- **params
317
+ response: OpenAIChatResponseType = await self._send_with_retries(
318
+ api_client=api_client,
319
+ params=params,
320
+ timeout=timeout,
291
321
  )
292
322
 
293
323
  return await format_llm_response(
@@ -311,3 +341,83 @@ class AsyncPrompty:
311
341
  inputs = self._resolve_inputs(kwargs)
312
342
  messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
313
343
  return messages
344
+
345
+ async def _send_with_retries(
346
+ self,
347
+ api_client: Union[AsyncAzureOpenAI, AsyncOpenAI],
348
+ params: Mapping[str, Any],
349
+ timeout: Optional[float],
350
+ max_retries: int = 10,
351
+ max_entity_retries: int = 3,
352
+ ) -> OpenAIChatResponseType:
353
+ """Send the request with retries.
354
+
355
+ :param Union[AsyncAzureOpenAI, AsyncOpenAI] api_client: The OpenAI client.
356
+ :param Mapping[str, Any] params: The request parameters.
357
+ :param Optional[float] timeout: The timeout for the request.
358
+ :param int max_retries: The maximum number of retries.
359
+ :param int max_entity_retries: The maximum number of retries for entity errors.
360
+ :return: The response from OpenAI.
361
+ :rtype: OpenAIChatResponseType
362
+ """
363
+
364
+ client_name: str = api_client.__class__.__name__
365
+ client: Union[AsyncAzureOpenAI, AsyncOpenAI] = api_client.with_options(timeout=timeout or NotGiven())
366
+
367
+ entity_retries: List[int] = [0]
368
+ should_retry: bool = True
369
+ retry: int = 0
370
+ delay: Optional[float] = None
371
+
372
+ while should_retry:
373
+ try:
374
+ if delay:
375
+ await asyncio.sleep(delay)
376
+
377
+ response = await client.chat.completions.create(**params)
378
+ return response
379
+ except OpenAIError as error:
380
+ if retry >= max_retries:
381
+ should_retry = False
382
+ else:
383
+ should_retry, delay = openai_error_retryable(error, retry, entity_retries, max_entity_retries)
384
+
385
+ if should_retry:
386
+ self._logger.warning(
387
+ "[%d/%d] %s request failed. %s: %s. Retrying in %f seconds.",
388
+ retry,
389
+ max_retries,
390
+ client_name,
391
+ type(error).__name__,
392
+ str(error),
393
+ delay or 0.0,
394
+ exc_info=True,
395
+ )
396
+ else:
397
+ self._logger.exception(
398
+ "[%d/%d] %s request failed. %s: %s",
399
+ retry,
400
+ max_retries,
401
+ client_name,
402
+ type(error).__name__,
403
+ str(error),
404
+ )
405
+ raise WrappedOpenAIError(error=error) from error
406
+
407
+ retry += 1
408
+
409
+ @staticmethod
410
+ def get_token_provider(cred: Union[TokenCredential, AsyncTokenCredential]) -> AsyncAzureADTokenProvider:
411
+ """Get the token provider for the prompty.
412
+
413
+ :param Union[TokenCredential, AsyncTokenCredential] cred: The Azure authentication credential.
414
+ :return: The token provider if a credential is provided, otherwise None.
415
+ :rtype: Optional[AsyncAzureADTokenProvider]
416
+ """
417
+ async def _wrapper() -> str:
418
+ token = cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT)
419
+ if isinstance(token, Awaitable):
420
+ token = await token
421
+ return token.token
422
+
423
+ return _wrapper
@@ -2,12 +2,15 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
+ # cspell:ignore apng, retriable
6
+
5
7
  import copy
6
- from dataclasses import dataclass, is_dataclass, fields
7
8
  import os
8
9
  import re
9
10
  import json
10
11
  import base64
12
+ from dataclasses import dataclass, is_dataclass, fields
13
+ from logging import Logger
11
14
  from pathlib import Path
12
15
  from typing import (
13
16
  Any,
@@ -30,6 +33,7 @@ from typing import (
30
33
  from jinja2 import Template
31
34
  from openai import AsyncStream
32
35
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
36
+ from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAIError
33
37
 
34
38
  from azure.ai.evaluation._constants import DefaultOpenEncoding
35
39
  from azure.ai.evaluation._legacy.prompty._exceptions import (
@@ -217,7 +221,7 @@ DEFAULT_IMAGE_MIME_TYPE: Final[str] = "image/*"
217
221
  """The mime type to use when we don't know the image type"""
218
222
 
219
223
  FILE_EXT_TO_MIME: Final[Mapping[str, str]] = {
220
- ".apng": "image/apng", # cspell:ignore apng
224
+ ".apng": "image/apng",
221
225
  ".avif": "image/avif",
222
226
  ".bmp": "image/bmp",
223
227
  ".gif": "image/gif",
@@ -542,4 +546,70 @@ async def format_llm_response(
542
546
  return result
543
547
 
544
548
 
549
+ def openai_error_retryable(
550
+ error: OpenAIError, retry: int, entity_retry: List[int], max_entity_retries: int
551
+ ) -> Tuple[bool, float]:
552
+ """
553
+ Determines if an OpenAI error is retryable, and optionally determines the min retry delay to use.
554
+ If none is returned, the caller will determine the delay to use.
555
+
556
+ :param OpenAIError error: The error to handle
557
+ :param int retry: The current retry count (0 means we're on the first attempt and no retries have been made)
558
+ :param List[int] entity_retry: The current retry count for the unprocessable entity failures. This should be a
559
+ list containing only 1 element to mimic pass by reference semantics. A value of 0 means we're on the
560
+ first attempt and no retries have been made.
561
+ :param int max_entity_retries: The maximum number of retries to make for unprocessable entity failures
562
+ :return: A tuple containing whether the error is retryable and the min delay to use if any
563
+ :rtype: Tuple[bool, Optional[float]]
564
+ """
565
+
566
+ # Using https://platform.openai.com/docs/guides/error-codes/api-errors#python-library-error-types as a reference
567
+
568
+ should_retry: bool
569
+ delay: Optional[float] = None
570
+
571
+ if isinstance(error, APIConnectionError):
572
+ retriable_error_messages: Sequence[str] = [
573
+ "connection aborted",
574
+ # issue 2296
575
+ "server disconnected without sending a response",
576
+ ]
577
+ should_retry = (
578
+ isinstance(error, APITimeoutError) # APITimeoutError is a subclass of APIConnectionError
579
+ or str(error).lower() in retriable_error_messages
580
+ or str(error.__cause__).lower() in retriable_error_messages
581
+ )
582
+ elif isinstance(error, APIStatusError):
583
+ status_code: int = error.response.status_code
584
+ if status_code == 422:
585
+ # As per the original legacy code, UnprocessableEntityError (HTTP 422) should be handled differently
586
+ # with a smaller retry count, as retrying more may not be beneficial.
587
+ should_retry = entity_retry[0] < max_entity_retries
588
+ entity_retry[0] += 1
589
+ elif status_code == 429:
590
+ # Two types, one is you are throttled and so should retry after a delay, the other is you have exceeded
591
+ # your quota and should not retry.
592
+ if (error.type or "").lower() == "insufficient_quota":
593
+ should_retry = False
594
+ else:
595
+ should_retry = True
596
+ should_retry = error.type != "insufficient_quota"
597
+ else:
598
+ should_retry = status_code >= 500
599
+
600
+ # Use what the service tells us to use for the delay if it's provided
601
+ if should_retry and not delay:
602
+ delay_str = error.response.headers.get("Retry-After", None)
603
+ if delay_str is not None:
604
+ delay = float(delay_str)
605
+ else:
606
+ should_retry = False
607
+
608
+ # Use exponential backoff for retries if the service doesn't provide a delay
609
+ if not delay:
610
+ delay = min(60, 2 + 2**retry)
611
+
612
+ return (should_retry, delay)
613
+
614
+
545
615
  # endregion
@@ -6,9 +6,10 @@ from enum import Enum
6
6
  import os
7
7
  import inspect
8
8
  import logging
9
+ import asyncio
9
10
  from datetime import datetime
10
11
  from azure.ai.evaluation._common._experimental import experimental
11
- from typing import Any, Callable, Dict, List, Optional, Union, cast
12
+ from typing import Any, Callable, Dict, List, Optional, Union, cast, Coroutine, TypeVar, Awaitable
12
13
  from azure.ai.evaluation._common.math import list_mean_nan_safe
13
14
  from azure.ai.evaluation._constants import CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT
14
15
  from azure.ai.evaluation._evaluators import (
@@ -20,6 +21,8 @@ from azure.ai.evaluation._evaluators import (
20
21
  _fluency,
21
22
  _xpia,
22
23
  _coherence,
24
+ _code_vulnerability,
25
+ _ungrounded_attributes,
23
26
  )
24
27
  from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
25
28
  from azure.ai.evaluation._evaluate import _evaluate
@@ -31,7 +34,7 @@ from azure.ai.evaluation.simulator import (
31
34
  AdversarialScenario,
32
35
  AdversarialScenarioJailbreak,
33
36
  IndirectAttackSimulator,
34
- DirectAttackSimulator ,
37
+ DirectAttackSimulator,
35
38
  )
36
39
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
37
40
  from azure.ai.evaluation.simulator._utils import JsonLineList
@@ -71,6 +74,7 @@ class _SafetyEvaluator(Enum):
71
74
  """
72
75
 
73
76
  CONTENT_SAFETY = "content_safety"
77
+ CODE_VULNERABILITY = "code_vulnerability"
74
78
  GROUNDEDNESS = "groundedness"
75
79
  PROTECTED_MATERIAL = "protected_material"
76
80
  RELEVANCE = "relevance"
@@ -80,21 +84,22 @@ class _SafetyEvaluator(Enum):
80
84
  INDIRECT_ATTACK = "indirect_attack"
81
85
  DIRECT_ATTACK = "direct_attack"
82
86
  ECI = "eci"
87
+ UNGROUNDED_ATTRIBUTES = "ungrounded_attributes"
83
88
 
84
89
 
85
90
  @experimental
86
91
  class _SafetyEvaluation:
87
92
  def __init__(
88
93
  self,
89
- azure_ai_project: dict,
94
+ azure_ai_project: Union[str, dict],
90
95
  credential: TokenCredential,
91
96
  model_config: Optional[Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]] = None,
92
97
  ):
93
98
  """
94
99
  Initializes a SafetyEvaluation object.
95
100
 
96
- :param azure_ai_project: A dictionary defining the Azure AI project. Required keys are 'subscription_id', 'resource_group_name', and 'project_name'.
97
- :type azure_ai_project: Dict[str, str]
101
+ :param azure_ai_project: A string or dictionary defining the Azure AI project. Required keys are 'subscription_id', 'resource_group_name', and 'project_name'.
102
+ :type azure_ai_project: Union[str, Dict[str, str]]
98
103
  :param credential: The credential for connecting to Azure AI project.
99
104
  :type credential: ~azure.core.credentials.TokenCredential
100
105
  :param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration.
@@ -106,8 +111,7 @@ class _SafetyEvaluation:
106
111
  self.model_config = model_config
107
112
  else:
108
113
  self.model_config = None
109
- validate_azure_ai_project(azure_ai_project)
110
- self.azure_ai_project = AzureAIProject(**azure_ai_project)
114
+ self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
111
115
  self.credential = credential
112
116
  self.logger = _setup_logger()
113
117
 
@@ -192,10 +196,17 @@ class _SafetyEvaluation:
192
196
  context = latest_message.get("context", None)
193
197
  latest_context = None
194
198
  try:
199
+ is_async = self._is_async_function(target)
195
200
  if self._check_target_returns_context(target):
196
- response, latest_context = target(query=application_input)
201
+ if is_async:
202
+ response, latest_context = await target(query=application_input)
203
+ else:
204
+ response, latest_context = target(query=application_input)
197
205
  else:
198
- response = target(query=application_input)
206
+ if is_async:
207
+ response = await target(query=application_input)
208
+ else:
209
+ response = target(query=application_input)
199
210
  except Exception as e:
200
211
  response = f"Something went wrong {e!s}"
201
212
 
@@ -372,6 +383,10 @@ class _SafetyEvaluation:
372
383
  )
373
384
  if evaluator == _SafetyEvaluator.ECI:
374
385
  return _UnstableAdversarialScenario.ECI
386
+ if evaluator == _SafetyEvaluator.CODE_VULNERABILITY:
387
+ return AdversarialScenario.ADVERSARIAL_CODE_VULNERABILITY
388
+ if evaluator == _SafetyEvaluator.UNGROUNDED_ATTRIBUTES:
389
+ return AdversarialScenario.ADVERSARIAL_UNGROUNDED_ATTRIBUTES
375
390
  if evaluator in [
376
391
  _SafetyEvaluator.GROUNDEDNESS,
377
392
  _SafetyEvaluator.RELEVANCE,
@@ -453,6 +468,14 @@ class _SafetyEvaluation:
453
468
  evaluators_dict["eci"] = ECIEvaluator(
454
469
  azure_ai_project=self.azure_ai_project, credential=self.credential
455
470
  )
471
+ elif evaluator == _SafetyEvaluator.CODE_VULNERABILITY:
472
+ evaluators_dict["code_vulnerability"] = _code_vulnerability.CodeVulnerabilityEvaluator(
473
+ azure_ai_project=self.azure_ai_project, credential=self.credential
474
+ )
475
+ elif evaluator == _SafetyEvaluator.UNGROUNDED_ATTRIBUTES:
476
+ evaluators_dict["ungrounded_attributes"] = _ungrounded_attributes.UngroundedAttributesEvaluator(
477
+ azure_ai_project=self.azure_ai_project, credential=self.credential
478
+ )
456
479
  else:
457
480
  msg = (
458
481
  f"Invalid evaluator: {evaluator}. Supported evaluators are: {_SafetyEvaluator.__members__.values()}"
@@ -465,7 +488,7 @@ class _SafetyEvaluation:
465
488
  blame=ErrorBlame.USER_ERROR,
466
489
  )
467
490
  return evaluators_dict
468
-
491
+
469
492
  @staticmethod
470
493
  def _check_target_returns_context(target: Callable) -> bool:
471
494
  """
@@ -478,6 +501,15 @@ class _SafetyEvaluation:
478
501
  ret_type = sig.return_annotation
479
502
  if ret_type == inspect.Signature.empty:
480
503
  return False
504
+
505
+ # Check for Coroutine/Awaitable return types for async functions
506
+ origin = getattr(ret_type, "__origin__", None)
507
+ if origin is not None and (origin is Coroutine or origin is Awaitable):
508
+ args = getattr(ret_type, "__args__", None)
509
+ if args and len(args) > 0:
510
+ # For async functions, check the actual return type inside the Coroutine
511
+ ret_type = args[-1]
512
+
481
513
  if ret_type is tuple:
482
514
  return True
483
515
  return False
@@ -494,13 +526,33 @@ class _SafetyEvaluation:
494
526
  ret_type = sig.return_annotation
495
527
  if ret_type == inspect.Signature.empty:
496
528
  return False
529
+
530
+ # Check for Coroutine/Awaitable return types for async functions
531
+ origin = getattr(ret_type, "__origin__", None)
532
+ if origin is not None and (origin is Coroutine or origin is Awaitable):
533
+ args = getattr(ret_type, "__args__", None)
534
+ if args and len(args) > 0:
535
+ # For async functions, check the actual return type inside the Coroutine
536
+ ret_type = args[-1]
537
+
497
538
  if ret_type is str:
498
539
  return True
499
540
  return False
500
541
 
501
-
502
542
  @staticmethod
503
- def _check_target_is_callback(target:Callable) -> bool:
543
+ def _is_async_function(target: Callable) -> bool:
544
+ """
545
+ Checks if the target function is an async function.
546
+
547
+ :param target: The target function to check.
548
+ :type target: Callable
549
+ :return: True if the target function is async, False otherwise.
550
+ :rtype: bool
551
+ """
552
+ return asyncio.iscoroutinefunction(target)
553
+
554
+ @staticmethod
555
+ def _check_target_is_callback(target: Callable) -> bool:
504
556
  sig = inspect.signature(target)
505
557
  param_names = list(sig.parameters.keys())
506
558
  return 'messages' in param_names and 'stream' in param_names and 'session_state' in param_names and 'context' in param_names
@@ -560,7 +612,28 @@ class _SafetyEvaluation:
560
612
  category=ErrorCategory.INVALID_VALUE,
561
613
  blame=ErrorBlame.USER_ERROR,
562
614
  )
563
-
615
+
616
+ if _SafetyEvaluator.CODE_VULNERABILITY in evaluators and num_turns > 1:
617
+ self.logger.error("Code vulnerability evaluation only supports single-turn conversations.")
618
+ msg = "Code vulnerability evaluation only supports single-turn conversations."
619
+ raise EvaluationException(
620
+ message=msg,
621
+ internal_message=msg,
622
+ target=ErrorTarget.UNKNOWN,
623
+ category=ErrorCategory.INVALID_VALUE,
624
+ blame=ErrorBlame.USER_ERROR,
625
+ )
626
+ if _SafetyEvaluator.UNGROUNDED_ATTRIBUTES in evaluators and num_turns > 1:
627
+ self.logger.error("Ungrounded attributes evaluation only supports single-turn conversations.")
628
+ msg = "Ungrounded attributes evaluation only supports single-turn conversations."
629
+ raise EvaluationException(
630
+ message=msg,
631
+ internal_message=msg,
632
+ target=ErrorTarget.UNKNOWN,
633
+ category=ErrorCategory.INVALID_VALUE,
634
+ blame=ErrorBlame.USER_ERROR,
635
+ )
636
+
564
637
  if _SafetyEvaluator.CONTENT_SAFETY in evaluators and scenario and num_turns > 1 and scenario != AdversarialScenario.ADVERSARIAL_CONVERSATION:
565
638
  self.logger.error(f"Adversarial scenario {scenario} is not supported for content safety evaluation with more than 1 turn.")
566
639
  msg = f"Adversarial scenario {scenario} is not supported for content safety evaluation with more than 1 turn."
@@ -630,7 +703,7 @@ class _SafetyEvaluation:
630
703
 
631
704
  async def __call__(
632
705
  self,
633
- target: Union[Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
706
+ target: Union[Callable, Awaitable[Any], AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
634
707
  evaluators: List[_SafetyEvaluator] = [],
635
708
  evaluation_name: Optional[str] = None,
636
709
  num_turns : int = 1,
@@ -644,12 +717,12 @@ class _SafetyEvaluation:
644
717
  jailbreak_data_path: Optional[Union[str, os.PathLike]] = None,
645
718
  output_path: Optional[Union[str, os.PathLike]] = None,
646
719
  data_paths: Optional[Union[Dict[str, str], Dict[str, Union[str,os.PathLike]]]] = None
647
- ) -> Union[Dict[str, EvaluationResult], Dict[str, str], Dict[str, Union[str,os.PathLike]]]:
720
+ ) -> Union[Dict[str, EvaluationResult], Dict[str, str], Dict[str, Union[str,os.PathLike]]]:
648
721
  '''
649
722
  Evaluates the target function based on the provided parameters.
650
723
 
651
- :param target: The target function to call during the evaluation.
652
- :type target: Callable
724
+ :param target: The target function to call during the evaluation. This can be a synchronous or asynchronous function.
725
+ :type target: Union[Callable, Awaitable[Any], AzureOpenAIModelConfiguration, OpenAIModelConfiguration]
653
726
  :param evaluators: A list of SafetyEvaluator.
654
727
  :type evaluators: List[_SafetyEvaluator]
655
728
  :param evaluation_name: The display name name of the evaluation.
@@ -3,4 +3,4 @@
3
3
  # ---------------------------------------------------------
4
4
  # represents upcoming version
5
5
 
6
- VERSION = "1.5.0"
6
+ VERSION = "1.6.0"
@@ -42,4 +42,4 @@ class AttackStrategy(Enum):
42
42
  raise ValueError("All items must be instances of AttackStrategy")
43
43
  if len(items) > 2:
44
44
  raise ValueError("Composed strategies must have at most 2 items")
45
- return items
45
+ return items