azure-ai-evaluation 1.0.0__py3-none-any.whl → 1.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (105) hide show
  1. azure/ai/evaluation/__init__.py +5 -31
  2. azure/ai/evaluation/_common/constants.py +2 -9
  3. azure/ai/evaluation/_common/rai_service.py +120 -300
  4. azure/ai/evaluation/_common/utils.py +23 -381
  5. azure/ai/evaluation/_constants.py +6 -19
  6. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/__init__.py +2 -3
  7. azure/ai/evaluation/_evaluate/{_batch_run/eval_run_context.py → _batch_run_client/batch_run_context.py} +7 -23
  8. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/code_client.py +17 -33
  9. azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/proxy_client.py +4 -32
  10. azure/ai/evaluation/_evaluate/_eval_run.py +24 -81
  11. azure/ai/evaluation/_evaluate/_evaluate.py +239 -393
  12. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +17 -17
  13. azure/ai/evaluation/_evaluate/_utils.py +28 -82
  14. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +18 -17
  15. azure/ai/evaluation/_evaluators/{_retrieval → _chat}/__init__.py +2 -2
  16. azure/ai/evaluation/_evaluators/_chat/_chat.py +357 -0
  17. azure/ai/evaluation/_evaluators/{_service_groundedness → _chat/retrieval}/__init__.py +2 -2
  18. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +157 -0
  19. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
  20. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +88 -78
  21. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +39 -76
  22. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +4 -0
  23. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +67 -105
  24. azure/ai/evaluation/_evaluators/{_multimodal/_content_safety_multimodal_base.py → _content_safety/_content_safety_base.py} +34 -24
  25. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +301 -0
  26. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +54 -105
  27. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +52 -99
  28. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +52 -101
  29. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +51 -101
  30. azure/ai/evaluation/_evaluators/_eci/_eci.py +54 -44
  31. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +19 -34
  32. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +89 -76
  33. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +41 -66
  34. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +16 -14
  35. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +87 -113
  36. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
  37. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +27 -20
  38. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +80 -89
  39. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
  40. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
  41. azure/ai/evaluation/_evaluators/_qa/_qa.py +30 -23
  42. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +96 -84
  43. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +47 -78
  44. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +27 -26
  45. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +38 -53
  46. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +5 -0
  47. azure/ai/evaluation/_evaluators/_xpia/xpia.py +105 -91
  48. azure/ai/evaluation/_exceptions.py +7 -28
  49. azure/ai/evaluation/_http_utils.py +132 -203
  50. azure/ai/evaluation/_model_configurations.py +8 -104
  51. azure/ai/evaluation/_version.py +1 -1
  52. azure/ai/evaluation/simulator/__init__.py +1 -2
  53. azure/ai/evaluation/simulator/_adversarial_scenario.py +1 -20
  54. azure/ai/evaluation/simulator/_adversarial_simulator.py +92 -111
  55. azure/ai/evaluation/simulator/_constants.py +1 -11
  56. azure/ai/evaluation/simulator/_conversation/__init__.py +12 -13
  57. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
  58. azure/ai/evaluation/simulator/_direct_attack_simulator.py +67 -33
  59. azure/ai/evaluation/simulator/_helpers/__init__.py +2 -1
  60. azure/ai/evaluation/{_common → simulator/_helpers}/_experimental.py +9 -24
  61. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +5 -26
  62. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +94 -107
  63. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +22 -70
  64. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +11 -28
  65. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +4 -8
  66. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +24 -68
  67. azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
  68. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +10 -6
  69. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +5 -6
  70. azure/ai/evaluation/simulator/_simulator.py +207 -277
  71. azure/ai/evaluation/simulator/_tracing.py +4 -4
  72. azure/ai/evaluation/simulator/_utils.py +13 -31
  73. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +449 -0
  74. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +99 -0
  75. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/WHEEL +1 -1
  76. azure/ai/evaluation/_common/math.py +0 -89
  77. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -46
  78. azure/ai/evaluation/_evaluators/_common/__init__.py +0 -13
  79. azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -344
  80. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -88
  81. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -133
  82. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -113
  83. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -99
  84. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  85. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  86. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  87. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  88. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  89. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  90. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  91. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -112
  92. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -93
  93. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -148
  94. azure/ai/evaluation/_vendor/__init__.py +0 -3
  95. azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -14
  96. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -328
  97. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -63
  98. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -63
  99. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -53
  100. azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  101. azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -1150
  102. azure_ai_evaluation-1.0.0.dist-info/METADATA +0 -595
  103. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +0 -70
  104. azure_ai_evaluation-1.0.0.dist-info/RECORD +0 -119
  105. {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/top_level.txt +0 -0
@@ -5,9 +5,8 @@ import inspect
5
5
  import json
6
6
  import logging
7
7
  import os
8
- from concurrent.futures import Future
9
8
  from pathlib import Path
10
- from typing import Any, Callable, Dict, Optional, Union, cast
9
+ from typing import Callable, Dict, Optional, Union
11
10
 
12
11
  import pandas as pd
13
12
  from promptflow.contracts.types import AttrDict
@@ -23,31 +22,25 @@ LOGGER = logging.getLogger(__name__)
23
22
 
24
23
  class CodeRun:
25
24
  def __init__(
26
- self,
27
- *,
28
- run: Future,
29
- input_data,
30
- evaluator_name: Optional[str] = None,
31
- aggregator: Callable[["CodeRun"], Future],
32
- **kwargs, # pylint: disable=unused-argument
33
- ) -> None:
25
+ self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs # pylint: disable=unused-argument
26
+ ):
34
27
  self.run = run
35
28
  self.evaluator_name = evaluator_name if evaluator_name is not None else ""
36
29
  self.input_data = input_data
37
- self.aggregated_metrics = aggregator(self)
30
+ self.aggregated_metrics = aggregated_metrics
38
31
 
39
- def get_result_df(self, exclude_inputs: bool = False) -> pd.DataFrame:
32
+ def get_result_df(self, exclude_inputs=False):
40
33
  batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
41
- result_df = cast(pd.DataFrame, self.run.result(timeout=batch_run_timeout))
34
+ result_df = self.run.result(timeout=batch_run_timeout)
42
35
  if exclude_inputs:
43
36
  result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")])
44
37
  return result_df
45
38
 
46
- def get_aggregated_metrics(self) -> Dict[str, Any]:
39
+ def get_aggregated_metrics(self):
47
40
  try:
48
41
  batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
49
- aggregated_metrics: Optional[Any] = (
50
- cast(Dict, self.aggregated_metrics.result(timeout=batch_run_timeout))
42
+ aggregated_metrics = (
43
+ self.aggregated_metrics.result(timeout=batch_run_timeout)
51
44
  if self.aggregated_metrics is not None
52
45
  else None
53
46
  )
@@ -111,10 +104,10 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
111
104
  verify_integrity=True,
112
105
  )
113
106
 
114
- @staticmethod
115
- def _calculate_aggregations(evaluator: Callable, run: CodeRun) -> Any:
107
+ def _calculate_aggregations(self, evaluator, run):
116
108
  try:
117
109
  if _has_aggregator(evaluator):
110
+ aggregate_input = None
118
111
  evaluator_output = run.get_result_df(exclude_inputs=True)
119
112
  if len(evaluator_output.columns) == 1 and evaluator_output.columns[0] == "output":
120
113
  aggregate_input = evaluator_output["output"].tolist()
@@ -159,30 +152,21 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
159
152
  column_mapping=column_mapping,
160
153
  evaluator_name=evaluator_name,
161
154
  )
162
-
163
- return CodeRun(
164
- run=eval_future,
165
- input_data=data,
166
- evaluator_name=evaluator_name,
167
- aggregator=lambda code_run: self._thread_pool.submit(
168
- self._calculate_aggregations, evaluator=flow, run=code_run
169
- ),
170
- )
155
+ run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
156
+ aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
157
+ run.aggregated_metrics = aggregation_future
158
+ return run
171
159
 
172
160
  def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
173
161
  result_df = run.get_result_df(exclude_inputs=not all_results)
174
162
  return result_df
175
163
 
176
- def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
164
+ def get_metrics(self, run: CodeRun) -> Optional[None]:
177
165
  try:
178
166
  aggregated_metrics = run.get_aggregated_metrics()
179
167
  print("Aggregated metrics")
180
168
  print(aggregated_metrics)
181
169
  except Exception as ex: # pylint: disable=broad-exception-caught
182
170
  LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", run.evaluator_name, ex)
183
- return {}
171
+ return None
184
172
  return aggregated_metrics
185
-
186
- def get_run_summary(self, run: CodeRun) -> Any: # pylint: disable=unused-argument
187
- # Not implemented
188
- return None
@@ -1,17 +1,13 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
-
5
- # pylint: disable=protected-access
6
-
7
4
  import inspect
8
5
  import logging
9
- import math
10
6
  import os
11
- from collections import OrderedDict
12
7
  from concurrent.futures import Future
13
8
  from typing import Any, Callable, Dict, Optional, Union
14
9
 
10
+ import numpy as np
15
11
  import pandas as pd
16
12
  from promptflow.client import PFClient
17
13
  from promptflow.entities import Run
@@ -40,7 +36,7 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
40
36
  **kwargs
41
37
  ) -> ProxyRun:
42
38
  flow_to_run = flow
43
- if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
39
+ if hasattr(flow, "_to_async"):
44
40
  flow_to_run = flow._to_async() # pylint: disable=protected-access
45
41
 
46
42
  batch_use_async = self._should_batch_use_async(flow_to_run)
@@ -57,40 +53,16 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
57
53
  def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
58
54
  run: Run = proxy_run.run.result()
59
55
  result_df = self._pf_client.get_details(run, all_results=all_results)
60
- result_df.replace("(Failed)", math.nan, inplace=True)
56
+ result_df.replace("(Failed)", np.nan, inplace=True)
61
57
  return result_df
62
58
 
63
59
  def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
64
60
  run: Run = proxy_run.run.result()
65
61
  return self._pf_client.get_metrics(run)
66
62
 
67
- def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
68
- run = proxy_run.run.result()
69
-
70
- # pylint: disable=protected-access
71
- completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
72
- failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")
73
-
74
- # Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines
75
- if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0:
76
- status = "Completed with Errors"
77
- else:
78
- status = run.status
79
-
80
- # Return the ordered dictionary with the updated status
81
- return OrderedDict(
82
- [
83
- ("status", status),
84
- ("duration", str(run._end_time - run._created_on)),
85
- ("completed_lines", completed_lines),
86
- ("failed_lines", failed_lines),
87
- ("log_path", str(run._output_path)),
88
- ]
89
- )
90
-
91
63
  @staticmethod
92
64
  def _should_batch_use_async(flow):
93
- if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
65
+ if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
94
66
  if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
95
67
  return True
96
68
  if inspect.iscoroutinefunction(flow):
@@ -10,18 +10,16 @@ import posixpath
10
10
  import time
11
11
  import types
12
12
  import uuid
13
- from typing import Any, Dict, List, Optional, Set, Type
13
+ from typing import Any, Dict, Optional, Set, Type
14
14
  from urllib.parse import urlparse
15
15
 
16
16
  from promptflow._sdk.entities import Run
17
- from typing_extensions import Self
18
17
 
19
18
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
20
19
  from azure.ai.evaluation._http_utils import get_http_client
21
20
  from azure.ai.evaluation._version import VERSION
22
21
  from azure.core.pipeline.policies import RetryPolicy
23
22
  from azure.core.rest import HttpResponse
24
- from azure.core.exceptions import HttpResponseError
25
23
 
26
24
  LOGGER = logging.getLogger(__name__)
27
25
 
@@ -29,20 +27,18 @@ LOGGER = logging.getLogger(__name__)
29
27
  # Handle optional import. The azure libraries are only present if
30
28
  # promptflow-azure is installed.
31
29
  try:
32
- from azure.ai.ml import MLClient
33
30
  from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
34
31
  from azure.ai.ml.entities._datastore.datastore import Datastore
35
32
  from azure.storage.blob import BlobServiceClient
36
33
  except (ModuleNotFoundError, ImportError):
37
- raise EvaluationException( # pylint: disable=raise-missing-from
38
- message=(
39
- "The required packages for remote tracking are missing.\n"
40
- 'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
41
- ),
42
- target=ErrorTarget.EVALUATE,
43
- category=ErrorCategory.MISSING_PACKAGE,
44
- blame=ErrorBlame.USER_ERROR,
45
- )
34
+ # If the above mentioned modules cannot be imported, we are running
35
+ # in local mode and MLClient in the constructor will be None, so
36
+ # we will not arrive to Azure-dependent code.
37
+
38
+ # We are logging the import failure only if debug logging level is set because:
39
+ # - If the project configuration was not provided this import is not needed.
40
+ # - If the project configuration was provided, the error will be raised by PFClient.
41
+ LOGGER.debug("promptflow.azure is not installed.")
46
42
 
47
43
 
48
44
  @dataclasses.dataclass
@@ -104,6 +100,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
104
100
  _SCOPE = "https://management.azure.com/.default"
105
101
 
106
102
  EVALUATION_ARTIFACT = "instance_results.jsonl"
103
+ EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
107
104
 
108
105
  def __init__(
109
106
  self,
@@ -124,8 +121,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
124
121
  self._run_name = run_name
125
122
  self._promptflow_run = promptflow_run
126
123
  self._status = RunStatus.NOT_STARTED
127
- self._url_base: Optional[str] = None
128
- self._info: Optional[RunInfo] = None
124
+ self._url_base = None
125
+ self.info = None
129
126
 
130
127
  @property
131
128
  def status(self) -> RunStatus:
@@ -137,20 +134,6 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
137
134
  """
138
135
  return self._status
139
136
 
140
- @property
141
- def info(self) -> RunInfo:
142
- if self._info is None:
143
- msg = "Run info is missing"
144
- raise EvaluationException(
145
- message=msg,
146
- internal_message=msg,
147
- target=ErrorTarget.EVAL_RUN,
148
- category=ErrorCategory.UNKNOWN,
149
- blame=ErrorBlame.UNKNOWN,
150
- )
151
-
152
- return self._info
153
-
154
137
  def _get_scope(self) -> str:
155
138
  """
156
139
  Return the scope information for the workspace.
@@ -178,11 +161,11 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
178
161
  )
179
162
  self._url_base = None
180
163
  self._status = RunStatus.BROKEN
181
- self._info = RunInfo.generate(self._run_name)
164
+ self.info = RunInfo.generate(self._run_name)
182
165
  else:
183
166
  self._url_base = urlparse(self._tracking_uri).netloc
184
167
  if self._promptflow_run is not None:
185
- self._info = RunInfo(
168
+ self.info = RunInfo(
186
169
  self._promptflow_run.name,
187
170
  self._promptflow_run._experiment_name, # pylint: disable=protected-access
188
171
  self._promptflow_run.name,
@@ -199,7 +182,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
199
182
  body["run_name"] = self._run_name
200
183
  response = self.request_with_retry(url=url, method="POST", json_dict=body)
201
184
  if response.status_code != 200:
202
- self._info = RunInfo.generate(self._run_name)
185
+ self.info = RunInfo.generate(self._run_name)
203
186
  LOGGER.warning(
204
187
  "The run failed to start: %s: %s."
205
188
  "The results will be saved locally, but will not be logged to Azure.",
@@ -209,7 +192,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
209
192
  self._status = RunStatus.BROKEN
210
193
  else:
211
194
  parsed_response = response.json()
212
- self._info = RunInfo(
195
+ self.info = RunInfo(
213
196
  run_id=parsed_response["run"]["info"]["run_id"],
214
197
  experiment_id=parsed_response["run"]["info"]["experiment_id"],
215
198
  run_name=parsed_response["run"]["info"]["run_name"],
@@ -252,7 +235,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
252
235
  LOGGER.warning("Unable to terminate the run.")
253
236
  self._status = RunStatus.TERMINATED
254
237
 
255
- def __enter__(self) -> Self:
238
+ def __enter__(self):
256
239
  """The Context Manager enter call.
257
240
 
258
241
  :return: The instance of the class.
@@ -266,7 +249,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
266
249
  exc_type: Optional[Type[BaseException]],
267
250
  exc_value: Optional[BaseException],
268
251
  exc_tb: Optional[types.TracebackType],
269
- ) -> None:
252
+ ) -> Optional[bool]:
270
253
  """The context manager exit call.
271
254
 
272
255
  :param exc_type: The exception type
@@ -413,7 +396,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
413
396
  """
414
397
  if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
415
398
  return
416
- # Check if artifact directory is empty or does not exist.
399
+ # Check if artifact dirrectory is empty or does not exist.
417
400
  if not os.path.isdir(artifact_folder):
418
401
  LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
419
402
  return
@@ -425,7 +408,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
425
408
  return
426
409
  # First we will list the files and the appropriate remote paths for them.
427
410
  root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
428
- remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
411
+ remote_paths = {"paths": []}
429
412
  local_paths = []
430
413
  # Go over the artifact folder and upload all artifacts.
431
414
  for root, _, filenames in os.walk(artifact_folder):
@@ -444,32 +427,15 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
444
427
  datastore = self._ml_client.datastores.get_default(include_secrets=True)
445
428
  account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
446
429
  svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
447
- try:
448
- for local, remote in zip(local_paths, remote_paths["paths"]):
449
- blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
450
- with open(local, "rb") as fp:
451
- blob_client.upload_blob(fp, overwrite=True)
452
- except HttpResponseError as ex:
453
- if ex.status_code == 403:
454
- msg = (
455
- "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
456
- " Please ensure that the necessary access rights are granted."
457
- )
458
- raise EvaluationException(
459
- message=msg,
460
- target=ErrorTarget.EVAL_RUN,
461
- category=ErrorCategory.FAILED_REMOTE_TRACKING,
462
- blame=ErrorBlame.USER_ERROR,
463
- tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
464
- ) from ex
465
-
466
- raise ex
430
+ for local, remote in zip(local_paths, remote_paths["paths"]):
431
+ blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
432
+ with open(local, "rb") as fp:
433
+ blob_client.upload_blob(fp, overwrite=True)
467
434
 
468
435
  # To show artifact in UI we will need to register it. If it is a promptflow run,
469
436
  # we are rewriting already registered artifact and need to skip this step.
470
437
  if self._is_promptflow_run:
471
438
  return
472
-
473
439
  url = (
474
440
  f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
475
441
  f"/resourceGroups/{self._resource_group_name}/providers/"
@@ -492,29 +458,6 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
492
458
  if response.status_code != 200:
493
459
  self._log_warning("register artifact", response)
494
460
 
495
- # register artifacts for images if exists in image folder
496
- try:
497
- for remote_path in remote_paths["paths"]:
498
- remote_file_path = remote_path["path"]
499
- if "images" in os.path.normpath(remote_file_path).split(os.sep):
500
- response = self.request_with_retry(
501
- url=url,
502
- method="POST",
503
- json_dict={
504
- "origin": "ExperimentRun",
505
- "container": f"dcid.{self.info.run_id}",
506
- "path": posixpath.join("images", os.path.basename(remote_file_path)),
507
- "dataPath": {
508
- "dataStoreName": datastore.name,
509
- "relativePath": remote_file_path,
510
- },
511
- },
512
- )
513
- if response.status_code != 200:
514
- self._log_warning("register image artifact", response)
515
- except Exception as ex: # pylint: disable=broad-exception-caught
516
- LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
517
-
518
461
  def _get_datastore_credential(self, datastore: "Datastore"):
519
462
  # Reference the logic in azure.ai.ml._artifact._artifact_utilities
520
463
  # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103