azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (58) hide show
  1. azure/ai/evaluation/__init__.py +1 -5
  2. azure/ai/evaluation/_common/rai_service.py +4 -4
  3. azure/ai/evaluation/_common/utils.py +19 -19
  4. azure/ai/evaluation/_constants.py +9 -0
  5. azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
  6. azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
  7. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
  8. azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
  9. azure/ai/evaluation/_evaluate/_evaluate.py +35 -28
  10. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
  11. azure/ai/evaluation/_evaluate/_utils.py +29 -22
  12. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  13. azure/ai/evaluation/_evaluators/_chat/_chat.py +16 -9
  14. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +4 -10
  15. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -10
  16. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
  17. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +1 -2
  18. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
  19. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +1 -1
  20. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +1 -1
  21. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +1 -1
  22. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +1 -1
  23. azure/ai/evaluation/_evaluators/_eci/_eci.py +2 -2
  24. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
  25. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +5 -10
  26. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  27. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +5 -10
  28. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
  29. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +2 -2
  30. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +2 -2
  31. azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
  32. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +5 -10
  33. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
  34. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -10
  35. azure/ai/evaluation/_evaluators/_xpia/xpia.py +1 -2
  36. azure/ai/evaluation/_http_utils.py +3 -3
  37. azure/ai/evaluation/_version.py +1 -1
  38. azure/ai/evaluation/simulator/__init__.py +1 -1
  39. azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
  40. azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
  41. azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
  42. azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
  43. azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
  44. azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
  45. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
  46. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
  47. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
  48. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
  49. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
  50. azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
  51. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  52. azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +147 -80
  53. azure/ai/evaluation/simulator/_tracing.py +21 -24
  54. azure/ai/evaluation/simulator/_utils.py +4 -1
  55. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/METADATA +86 -14
  56. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/RECORD +58 -56
  57. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/WHEEL +0 -0
  58. {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/top_level.txt +0 -0
@@ -6,21 +6,24 @@ import functools
6
6
  import inspect
7
7
  import json
8
8
  import logging
9
- from typing import Callable, Dict
9
+ from typing import Callable, Dict, TypeVar
10
10
 
11
11
  import pandas as pd
12
-
13
12
  from promptflow._sdk.entities._flows import FlexFlow as flex_flow
14
13
  from promptflow._sdk.entities._flows import Prompty as prompty_sdk
15
14
  from promptflow._sdk.entities._flows.dag import Flow as dag_flow
16
15
  from promptflow.client import PFClient
17
16
  from promptflow.core import Prompty as prompty_core
17
+ from typing_extensions import ParamSpec
18
18
 
19
19
  from ..._user_agent import USER_AGENT
20
20
  from .._utils import _trace_destination_from_project_scope
21
21
 
22
22
  LOGGER = logging.getLogger(__name__)
23
23
 
24
+ P = ParamSpec("P")
25
+ R = TypeVar("R")
26
+
24
27
 
25
28
  def _get_evaluator_type(evaluator: Dict[str, Callable]):
26
29
  """
@@ -82,7 +85,7 @@ def _get_evaluator_properties(evaluator, evaluator_name):
82
85
  name = str(evaluator)
83
86
  pf_type = "Unknown"
84
87
  except Exception as e: # pylint: disable=broad-exception-caught
85
- LOGGER.debug(f"Failed to get evaluator properties: {e}")
88
+ LOGGER.debug("Failed to get evaluator properties: %s", e)
86
89
  name = str(evaluator)
87
90
  pf_type = "Unknown"
88
91
 
@@ -95,15 +98,17 @@ def _get_evaluator_properties(evaluator, evaluator_name):
95
98
 
96
99
 
97
100
  # cspell:ignore isna
98
- def log_evaluate_activity(func) -> None:
101
+ def log_evaluate_activity(func: Callable[P, R]) -> Callable[P, R]:
99
102
  """Decorator to log evaluate activity
100
103
 
101
104
  :param func: The function to be decorated
102
105
  :type func: Callable
106
+ :returns: The decorated function
107
+ :rtype: Callable[P, R]
103
108
  """
104
109
 
105
110
  @functools.wraps(func)
106
- def wrapper(*args, **kwargs) -> Callable:
111
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
107
112
  from promptflow._sdk._telemetry import ActivityType, log_activity
108
113
  from promptflow._sdk._telemetry.telemetry import get_telemetry_logger
109
114
 
@@ -119,7 +124,7 @@ def log_evaluate_activity(func) -> None:
119
124
  user_agent=USER_AGENT,
120
125
  )
121
126
 
122
- track_in_cloud = bool(pf_client._config.get_trace_destination())
127
+ track_in_cloud = bool(pf_client._config.get_trace_destination()) # pylint: disable=protected-access
123
128
  evaluate_target = bool(kwargs.get("target", None))
124
129
  evaluator_config = bool(kwargs.get("evaluator_config", None))
125
130
  custom_dimensions = {
@@ -154,7 +159,7 @@ def log_evaluate_activity(func) -> None:
154
159
  evaluator_info["failed_rows"] = failed_rows
155
160
  evaluator_info["total_rows"] = total_rows
156
161
  except Exception as e: # pylint: disable=broad-exception-caught
157
- LOGGER.debug(f"Failed to collect evaluate failed row info for {evaluator_name}: {e}")
162
+ LOGGER.debug("Failed to collect evaluate failed row info for %s: %s", evaluator_name, e)
158
163
  evaluators_info.append(evaluator_info)
159
164
 
160
165
  custom_dimensions = {"evaluators_info": json.dumps(evaluators_info)}
@@ -167,7 +172,7 @@ def log_evaluate_activity(func) -> None:
167
172
  ):
168
173
  pass
169
174
  except Exception as e: # pylint: disable=broad-exception-caught
170
- LOGGER.debug(f"Failed to collect evaluate usage info: {e}")
175
+ LOGGER.debug("Failed to collect evaluate usage info: %s", e)
171
176
 
172
177
  return result
173
178
 
@@ -8,12 +8,13 @@ import re
8
8
  import tempfile
9
9
  from collections import namedtuple
10
10
  from pathlib import Path
11
+ from typing import Dict
11
12
 
12
13
  import pandas as pd
13
14
 
14
- from azure.ai.evaluation._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, Prefixes
15
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
15
+ from azure.ai.evaluation._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, DefaultOpenEncoding, Prefixes
16
16
  from azure.ai.evaluation._evaluate._eval_run import EvalRun
17
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
17
18
 
18
19
  LOGGER = logging.getLogger(__name__)
19
20
 
@@ -33,16 +34,16 @@ def extract_workspace_triad_from_trace_provider(trace_provider: str): # pylint:
33
34
  match = re.match(AZURE_WORKSPACE_REGEX_FORMAT, trace_provider)
34
35
  if not match or len(match.groups()) != 5:
35
36
  raise EvaluationException(
36
- message="Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
37
- "resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
38
- f"workspaces/<workspace_name>, got {trace_provider}",
39
- internal_message="Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
40
- "resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
41
- "workspaces/<workspace_name>,",
42
- target=ErrorTarget.UNKNOWN,
43
- category=ErrorCategory.INVALID_VALUE,
44
- blame=ErrorBlame.UNKNOWN,
45
- )
37
+ message="Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
38
+ "resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
39
+ f"workspaces/<workspace_name>, got {trace_provider}",
40
+ internal_message="Malformed trace provider string, expected azureml://subscriptions/<subscription_id>/"
41
+ "resourceGroups/<resource_group>/providers/Microsoft.MachineLearningServices/"
42
+ "workspaces/<workspace_name>,",
43
+ target=ErrorTarget.UNKNOWN,
44
+ category=ErrorCategory.INVALID_VALUE,
45
+ blame=ErrorBlame.UNKNOWN,
46
+ )
46
47
  subscription_id = match.group(1)
47
48
  resource_group_name = match.group(3)
48
49
  workspace_name = match.group(5)
@@ -50,7 +51,7 @@ def extract_workspace_triad_from_trace_provider(trace_provider: str): # pylint:
50
51
 
51
52
 
52
53
  def load_jsonl(path):
53
- with open(path, "r", encoding="utf-8") as f:
54
+ with open(path, "r", encoding=DefaultOpenEncoding.READ) as f:
54
55
  return [json.loads(line) for line in f.readlines()]
55
56
 
56
57
 
@@ -99,7 +100,7 @@ def _log_metrics_and_instance_results(
99
100
  with tempfile.TemporaryDirectory() as tmpdir:
100
101
  tmp_path = os.path.join(tmpdir, artifact_name)
101
102
 
102
- with open(tmp_path, "w", encoding="utf-8") as f:
103
+ with open(tmp_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
103
104
  f.write(instance_results.to_json(orient="records", lines=True))
104
105
 
105
106
  ev_run.log_artifact(tmpdir, artifact_name)
@@ -155,11 +156,13 @@ def _write_output(path, data_dict):
155
156
  if os.path.isdir(path):
156
157
  p = p / DEFAULT_EVALUATION_RESULTS_FILE_NAME
157
158
 
158
- with open(p, "w") as f:
159
+ with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
159
160
  json.dump(data_dict, f)
160
161
 
161
162
 
162
- def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace: bool = False) -> pd.DataFrame:
163
+ def _apply_column_mapping(
164
+ source_df: pd.DataFrame, mapping_config: Dict[str, str], inplace: bool = False
165
+ ) -> pd.DataFrame:
163
166
  """
164
167
  Apply column mapping to source_df based on mapping_config.
165
168
 
@@ -167,10 +170,11 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace
167
170
  :param source_df: the data frame to be changed.
168
171
  :type source_df: pd.DataFrame
169
172
  :param mapping_config: The configuration, containing column mapping.
170
- :type mapping_config: dict.
173
+ :type mapping_config: Dict[str, str].
171
174
  :param inplace: If true, the source_df will be changed inplace.
172
175
  :type inplace: bool
173
176
  :return: The modified data frame.
177
+ :rtype: pd.DataFrame
174
178
  """
175
179
  result_df = source_df
176
180
 
@@ -211,19 +215,22 @@ def _has_aggregator(evaluator):
211
215
  return hasattr(evaluator, "__aggregate__")
212
216
 
213
217
 
214
- def get_int_env_var(env_var_name, default_value=None):
218
+ def get_int_env_var(env_var_name: str, default_value: int) -> int:
215
219
  """
216
- The function `get_int_env_var` retrieves an integer environment variable value, with an optional
220
+ The function `get_int_env_var` retrieves an integer environment variable value, with a
217
221
  default value if the variable is not set or cannot be converted to an integer.
218
222
 
219
223
  :param env_var_name: The name of the environment variable you want to retrieve the value of
224
+ :type env_var_name: str
220
225
  :param default_value: The default value is the value that will be returned if the environment
221
- variable is not found or if it cannot be converted to an integer
226
+ variable is not found or if it cannot be converted to an integer
227
+ :type default_value: int
222
228
  :return: an integer value.
229
+ :rtype: int
223
230
  """
224
231
  try:
225
- return int(os.environ.get(env_var_name, default_value))
226
- except Exception:
232
+ return int(os.environ[env_var_name])
233
+ except (ValueError, KeyError):
227
234
  return default_value
228
235
 
229
236
 
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
5
-
6
5
  from promptflow._utils.async_utils import async_run_allowing_running_loop
6
+
7
7
  from azure.ai.evaluation._common.utils import nltk_tokenize
8
8
 
9
9
 
@@ -4,19 +4,18 @@
4
4
  import json
5
5
  import logging
6
6
  from concurrent.futures import as_completed
7
- from typing import Dict, List, Union
7
+ from typing import Dict, List
8
8
 
9
9
  import numpy as np
10
-
11
10
  from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
12
11
 
12
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
+
13
14
  from .._coherence import CoherenceEvaluator
14
15
  from .._fluency import FluencyEvaluator
15
16
  from .._groundedness import GroundednessEvaluator
16
- from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
17
17
  from .._relevance import RelevanceEvaluator
18
18
  from .retrieval import RetrievalChatEvaluator
19
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
20
19
 
21
20
  logger = logging.getLogger(__name__)
22
21
 
@@ -203,7 +202,7 @@ class ChatEvaluator:
203
202
  return score
204
203
  except Exception as e: # pylint: disable=broad-exception-caught
205
204
  logger.warning(
206
- f"Evaluator {evaluator.__class__.__name__} failed for turn {turn_num + 1} with exception: {e}"
205
+ "Evaluator %s failed for turn %s with exception: %s", evaluator.__class__.__name__, turn_num + 1, e
207
206
  )
208
207
  return {}
209
208
 
@@ -264,7 +263,10 @@ class ChatEvaluator:
264
263
  )
265
264
 
266
265
  if "role" not in turn or "content" not in turn:
267
- msg = f"Each turn in 'conversation' must have 'role' and 'content' keys. Turn number: {one_based_turn_num}"
266
+ msg = (
267
+ "Each turn in 'conversation' must have 'role' and 'content' keys. "
268
+ + f"Turn number: {one_based_turn_num}"
269
+ )
268
270
  raise EvaluationException(
269
271
  message=msg,
270
272
  internal_message=msg,
@@ -272,7 +274,7 @@ class ChatEvaluator:
272
274
  category=ErrorCategory.INVALID_VALUE,
273
275
  blame=ErrorBlame.USER_ERROR,
274
276
  )
275
-
277
+
276
278
  if turn["role"] != expected_role:
277
279
  msg = f"Expected role {expected_role} but got {turn['role']}. Turn number: {one_based_turn_num}"
278
280
  raise EvaluationException(
@@ -305,7 +307,9 @@ class ChatEvaluator:
305
307
  )
306
308
 
307
309
  if "citations" not in turn["context"]:
308
- msg = f"Context in each assistant's turn must have 'citations' key. Turn number: {one_based_turn_num}"
310
+ msg = (
311
+ f"Context in each assistant's turn must have 'citations' key. Turn number: {one_based_turn_num}"
312
+ )
309
313
  raise EvaluationException(
310
314
  message=msg,
311
315
  internal_message=msg,
@@ -326,7 +330,10 @@ class ChatEvaluator:
326
330
 
327
331
  for citation_num, citation in enumerate(turn["context"]["citations"]):
328
332
  if not isinstance(citation, dict):
329
- msg = f"Each citation in 'citations' must be a dictionary. Turn number: {one_based_turn_num}, Citation number: {citation_num + 1}"
333
+ msg = (
334
+ "Each citation in 'citations' must be a dictionary. "
335
+ + f"Turn number: {one_based_turn_num}, Citation number: {citation_num + 1}"
336
+ )
330
337
  raise EvaluationException(
331
338
  message=msg,
332
339
  internal_message=msg,
@@ -6,18 +6,12 @@ import json
6
6
  import logging
7
7
  import os
8
8
  import re
9
- from typing import Union
10
9
 
11
10
  import numpy as np
12
-
13
11
  from promptflow._utils.async_utils import async_run_allowing_running_loop
14
12
  from promptflow.core import AsyncPrompty
15
13
 
16
- from ...._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
17
- from ...._common.utils import (
18
- check_and_add_api_version_for_aoai_model_config,
19
- check_and_add_user_agent_for_aoai_model_config,
20
- )
14
+ from ...._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
21
15
 
22
16
  logger = logging.getLogger(__name__)
23
17
 
@@ -34,7 +28,7 @@ class _AsyncRetrievalChatEvaluator:
34
28
  DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
35
29
 
36
30
  def __init__(self, model_config: dict):
37
- check_and_add_api_version_for_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
31
+ ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
38
32
 
39
33
  prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
40
34
 
@@ -42,7 +36,7 @@ class _AsyncRetrievalChatEvaluator:
42
36
  # https://github.com/encode/httpx/discussions/2959
43
37
  prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
44
38
 
45
- check_and_add_user_agent_for_aoai_model_config(
39
+ ensure_user_agent_in_aoai_model_config(
46
40
  model_config,
47
41
  prompty_model_config,
48
42
  USER_AGENT,
@@ -92,7 +86,7 @@ class _AsyncRetrievalChatEvaluator:
92
86
 
93
87
  except Exception as e: # pylint: disable=broad-exception-caught
94
88
  logger.warning(
95
- f"Evaluator {self.__class__.__name__} failed for turn {turn_num + 1} with exception: {e}"
89
+ "Evaluator %s failed for turn %s with exception: %s", self.__class__.__name__, turn_num + 1, e
96
90
  )
97
91
 
98
92
  per_turn_scores.append(np.nan)
@@ -4,19 +4,14 @@
4
4
 
5
5
  import os
6
6
  import re
7
- from typing import Union
8
7
 
9
8
  import numpy as np
10
-
11
9
  from promptflow._utils.async_utils import async_run_allowing_running_loop
12
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
13
10
  from promptflow.core import AsyncPrompty
14
11
 
15
- from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
16
- from ..._common.utils import (
17
- check_and_add_api_version_for_aoai_model_config,
18
- check_and_add_user_agent_for_aoai_model_config,
19
- )
12
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
+
14
+ from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
20
15
 
21
16
  try:
22
17
  from ..._user_agent import USER_AGENT
@@ -31,7 +26,7 @@ class _AsyncCoherenceEvaluator:
31
26
  DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
32
27
 
33
28
  def __init__(self, model_config: dict):
34
- check_and_add_api_version_for_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
29
+ ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
35
30
 
36
31
  prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
37
32
 
@@ -39,7 +34,7 @@ class _AsyncCoherenceEvaluator:
39
34
  # https://github.com/encode/httpx/discussions/2959
40
35
  prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
41
36
 
42
- check_and_add_user_agent_for_aoai_model_config(
37
+ ensure_user_agent_in_aoai_model_config(
43
38
  model_config,
44
39
  prompty_model_config,
45
40
  USER_AGENT,
@@ -5,8 +5,6 @@ from concurrent.futures import as_completed
5
5
 
6
6
  from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
7
7
 
8
- from azure.ai.evaluation._model_configurations import AzureAIProject
9
-
10
8
  try:
11
9
  from ._hate_unfairness import HateUnfairnessEvaluator
12
10
  from ._self_harm import SelfHarmEvaluator
@@ -6,8 +6,7 @@ from abc import ABC
6
6
 
7
7
  from azure.ai.evaluation._common.constants import EvaluationMetrics
8
8
  from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
9
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
10
- from azure.ai.evaluation._model_configurations import AzureAIProject
9
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
11
10
 
12
11
 
13
12
  class ContentSafetyEvaluatorBase(ABC):
@@ -7,9 +7,8 @@ from typing import Dict, List
7
7
 
8
8
  import numpy as np
9
9
  from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
10
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
11
10
 
12
- from azure.ai.evaluation._model_configurations import AzureAIProject
11
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
12
 
14
13
  try:
15
14
  from ._hate_unfairness import HateUnfairnessEvaluator
@@ -165,7 +164,10 @@ class ContentSafetyChatEvaluator:
165
164
  return score
166
165
  except Exception as e: # pylint: disable=broad-exception-caught
167
166
  logger.warning(
168
- f"Evaluator {evaluator.__class__.__name__} failed for turn {turn_num + 1} with exception: {e}"
167
+ "Evaluator %s failed for turn %s with exception: %s",
168
+ evaluator.__class__.__name__,
169
+ turn_num + 1,
170
+ e,
169
171
  )
170
172
  return {}
171
173
 
@@ -235,7 +237,10 @@ class ContentSafetyChatEvaluator:
235
237
  )
236
238
 
237
239
  if "role" not in turn or "content" not in turn:
238
- msg = f"Each turn in 'conversation' must have 'role' and 'content' keys. Turn number: {one_based_turn_num}"
240
+ msg = (
241
+ "Each turn in 'conversation' must have 'role' and 'content' keys. "
242
+ + f"Turn number: {one_based_turn_num}"
243
+ )
239
244
  raise EvaluationException(
240
245
  message=msg,
241
246
  internal_message=msg,
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import EvaluationMetrics
6
- from azure.ai.evaluation._model_configurations import AzureAIProject
7
7
 
8
8
  try:
9
9
  from ._content_safety_base import ContentSafetyEvaluatorBase
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import EvaluationMetrics
6
- from azure.ai.evaluation._model_configurations import AzureAIProject
7
7
 
8
8
  try:
9
9
  from ._content_safety_base import ContentSafetyEvaluatorBase
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import EvaluationMetrics
6
- from azure.ai.evaluation._model_configurations import AzureAIProject
7
7
 
8
8
  try:
9
9
  from ._content_safety_base import ContentSafetyEvaluatorBase
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import EvaluationMetrics
6
- from azure.ai.evaluation._model_configurations import AzureAIProject
7
7
 
8
8
  try:
9
9
  from ._content_safety_base import ContentSafetyEvaluatorBase
@@ -2,10 +2,10 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics
6
7
  from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
7
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
8
- from azure.ai.evaluation._model_configurations import AzureAIProject
8
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
9
9
 
10
10
 
11
11
  class _AsyncECIEvaluator:
@@ -6,7 +6,8 @@ from collections import Counter
6
6
  from typing import List
7
7
 
8
8
  from promptflow._utils.async_utils import async_run_allowing_running_loop
9
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
9
+
10
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
11
 
11
12
 
12
13
  class _AsyncF1ScoreEvaluator:
@@ -4,19 +4,14 @@
4
4
 
5
5
  import os
6
6
  import re
7
- from typing import Union
8
7
 
9
8
  import numpy as np
10
-
11
9
  from promptflow._utils.async_utils import async_run_allowing_running_loop
12
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
13
10
  from promptflow.core import AsyncPrompty
14
11
 
15
- from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
16
- from ..._common.utils import (
17
- check_and_add_api_version_for_aoai_model_config,
18
- check_and_add_user_agent_for_aoai_model_config,
19
- )
12
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
+
14
+ from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
20
15
 
21
16
  try:
22
17
  from ..._user_agent import USER_AGENT
@@ -31,7 +26,7 @@ class _AsyncFluencyEvaluator:
31
26
  DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
32
27
 
33
28
  def __init__(self, model_config: dict):
34
- check_and_add_api_version_for_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
29
+ ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
35
30
 
36
31
  prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
37
32
 
@@ -39,7 +34,7 @@ class _AsyncFluencyEvaluator:
39
34
  # https://github.com/encode/httpx/discussions/2959
40
35
  prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
41
36
 
42
- check_and_add_user_agent_for_aoai_model_config(
37
+ ensure_user_agent_in_aoai_model_config(
43
38
  model_config,
44
39
  prompty_model_config,
45
40
  USER_AGENT,
@@ -2,8 +2,8 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from nltk.translate.gleu_score import sentence_gleu
5
-
6
5
  from promptflow._utils.async_utils import async_run_allowing_running_loop
6
+
7
7
  from azure.ai.evaluation._common.utils import nltk_tokenize
8
8
 
9
9
 
@@ -4,19 +4,14 @@
4
4
 
5
5
  import os
6
6
  import re
7
- from typing import Union
8
7
 
9
8
  import numpy as np
10
-
11
9
  from promptflow._utils.async_utils import async_run_allowing_running_loop
12
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
13
10
  from promptflow.core import AsyncPrompty
14
11
 
15
- from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
16
- from ..._common.utils import (
17
- check_and_add_api_version_for_aoai_model_config,
18
- check_and_add_user_agent_for_aoai_model_config,
19
- )
12
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
+
14
+ from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
20
15
 
21
16
  try:
22
17
  from ..._user_agent import USER_AGENT
@@ -31,7 +26,7 @@ class _AsyncGroundednessEvaluator:
31
26
  DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
32
27
 
33
28
  def __init__(self, model_config: dict):
34
- check_and_add_api_version_for_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
29
+ ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
35
30
 
36
31
  prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
37
32
 
@@ -39,7 +34,7 @@ class _AsyncGroundednessEvaluator:
39
34
  # https://github.com/encode/httpx/discussions/2959
40
35
  prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
41
36
 
42
- check_and_add_user_agent_for_aoai_model_config(
37
+ ensure_user_agent_in_aoai_model_config(
43
38
  model_config,
44
39
  prompty_model_config,
45
40
  USER_AGENT,
@@ -4,6 +4,7 @@
4
4
  import nltk
5
5
  from nltk.translate.meteor_score import meteor_score
6
6
  from promptflow._utils.async_utils import async_run_allowing_running_loop
7
+
7
8
  from azure.ai.evaluation._common.utils import nltk_tokenize
8
9
 
9
10
 
@@ -2,10 +2,10 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import EvaluationMetrics
6
7
  from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
7
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
8
- from azure.ai.evaluation._model_configurations import AzureAIProject
8
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
9
9
 
10
10
 
11
11
  class _AsyncProtectedMaterialEvaluator:
@@ -2,10 +2,10 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  from promptflow._utils.async_utils import async_run_allowing_running_loop
5
+
5
6
  from azure.ai.evaluation._common.constants import EvaluationMetrics
6
7
  from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
7
- from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
8
- from azure.ai.evaluation._model_configurations import AzureAIProject
8
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
9
9
 
10
10
 
11
11
  class _AsyncProtectedMaterialsEvaluator: