azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.0b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (79) hide show
  1. azure/ai/evaluation/__init__.py +22 -0
  2. azure/ai/evaluation/_common/constants.py +5 -0
  3. azure/ai/evaluation/_common/math.py +11 -0
  4. azure/ai/evaluation/_common/rai_service.py +172 -35
  5. azure/ai/evaluation/_common/utils.py +162 -23
  6. azure/ai/evaluation/_constants.py +6 -6
  7. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
  8. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +4 -4
  9. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +6 -3
  10. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +35 -0
  11. azure/ai/evaluation/_evaluate/_eval_run.py +21 -4
  12. azure/ai/evaluation/_evaluate/_evaluate.py +267 -139
  13. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -5
  14. azure/ai/evaluation/_evaluate/_utils.py +40 -7
  15. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  16. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +14 -9
  17. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
  18. azure/ai/evaluation/_evaluators/_common/_base_eval.py +20 -19
  19. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +18 -8
  20. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +48 -9
  21. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +56 -19
  22. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +5 -5
  23. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +30 -1
  24. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +30 -1
  25. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +30 -1
  26. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +30 -1
  27. azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -1
  28. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +20 -20
  29. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
  30. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  31. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +49 -15
  32. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  33. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  34. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -7
  35. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  36. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +130 -0
  37. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +57 -0
  38. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +96 -0
  39. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +120 -0
  40. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +96 -0
  41. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +96 -0
  42. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +96 -0
  43. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +44 -11
  44. azure/ai/evaluation/_evaluators/_qa/_qa.py +7 -3
  45. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -19
  46. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
  47. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +125 -82
  48. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
  49. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +2 -2
  50. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  51. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +150 -0
  52. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +17 -14
  53. azure/ai/evaluation/_evaluators/_xpia/xpia.py +32 -5
  54. azure/ai/evaluation/_exceptions.py +17 -0
  55. azure/ai/evaluation/_model_configurations.py +18 -1
  56. azure/ai/evaluation/_version.py +1 -1
  57. azure/ai/evaluation/simulator/__init__.py +2 -1
  58. azure/ai/evaluation/simulator/_adversarial_scenario.py +5 -0
  59. azure/ai/evaluation/simulator/_adversarial_simulator.py +4 -1
  60. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  61. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  62. azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
  63. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  64. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +22 -1
  65. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +79 -34
  66. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -1
  67. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -4
  68. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -1
  69. azure/ai/evaluation/simulator/_simulator.py +115 -61
  70. azure/ai/evaluation/simulator/_utils.py +6 -6
  71. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/METADATA +166 -9
  72. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/NOTICE.txt +20 -0
  73. azure_ai_evaluation-1.0.0b5.dist-info/RECORD +120 -0
  74. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/WHEEL +1 -1
  75. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
  76. azure_ai_evaluation-1.0.0b4.dist-info/RECORD +0 -106
  77. /azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +0 -0
  78. /azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +0 -0
  79. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/top_level.txt +0 -0
@@ -2,13 +2,14 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
+ import re
5
6
  import math
6
7
  import threading
7
- from typing import Any, List, Literal, Mapping, Type, TypeVar, Union, cast, get_args, get_origin
8
+ from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
8
9
 
9
10
  import nltk
10
11
  from typing_extensions import NotRequired, Required, TypeGuard
11
-
12
+ from promptflow.core._errors import MissingRequiredPackage
12
13
  from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
13
14
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
14
15
  from azure.ai.evaluation._model_configurations import (
@@ -24,7 +25,7 @@ _nltk_data_download_lock = threading.Lock()
24
25
  T_TypedDict = TypeVar("T_TypedDict", bound=Mapping[Any, Any])
25
26
 
26
27
 
27
- def get_harm_severity_level(harm_score: int) -> Union[str, float]:
28
+ def get_harm_severity_level(harm_score: Union[float, int]) -> Union[str, float]:
28
29
  """Generate harm severity level based on harm score.
29
30
 
30
31
  :param harm_score: The harm score to be evaluated.
@@ -48,13 +49,19 @@ def get_harm_severity_level(harm_score: int) -> Union[str, float]:
48
49
 
49
50
  def ensure_nltk_data_downloaded():
50
51
  """Download NLTK data packages if not already downloaded."""
52
+ nltk_data = [
53
+ ("wordnet", "corpora/wordnet.zip"),
54
+ ("perluniprops", "misc/perluniprops.zip"),
55
+ ("punkt", "tokenizers/punkt.zip"),
56
+ ("punkt_tab", "tokenizers/punkt_tab.zip"),
57
+ ]
58
+
51
59
  with _nltk_data_download_lock:
52
- try:
53
- from nltk.tokenize.nist import NISTTokenizer # pylint: disable=unused-import
54
- except LookupError:
55
- nltk.download("perluniprops")
56
- nltk.download("punkt")
57
- nltk.download("punkt_tab")
60
+ for _id, resource_name in nltk_data:
61
+ try:
62
+ nltk.find(resource_name)
63
+ except LookupError:
64
+ nltk.download(_id)
58
65
 
59
66
 
60
67
  def nltk_tokenize(text: str) -> List[str]:
@@ -122,24 +129,23 @@ def validate_azure_ai_project(o: object) -> AzureAIProject:
122
129
  fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
123
130
 
124
131
  if not isinstance(o, dict):
125
- msg = "azure_ai_project must be a dictionary"
132
+ msg = "The 'azure_ai_project' parameter must be a dictionary."
126
133
  raise EvaluationException(
127
134
  message=msg,
128
- internal_message=msg,
129
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
130
- category=ErrorCategory.MISSING_FIELD,
135
+ category=ErrorCategory.INVALID_VALUE,
131
136
  blame=ErrorBlame.USER_ERROR,
132
137
  )
133
138
 
134
139
  missing_fields = set(fields.keys()) - o.keys()
135
140
 
136
141
  if missing_fields:
137
- msg = "azure_ai_project must contain keys: " + ", ".join(f'"{field}"' for field in missing_fields)
142
+ msg = (
143
+ "The 'azure_ai_project' dictionary is missing the following required "
144
+ f"field(s): {', '.join(f'{field}' for field in missing_fields)}."
145
+ )
138
146
  raise EvaluationException(
139
147
  message=msg,
140
- internal_message=msg,
141
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
142
- category=ErrorCategory.MISSING_FIELD,
148
+ category=ErrorCategory.INVALID_VALUE,
143
149
  blame=ErrorBlame.USER_ERROR,
144
150
  )
145
151
 
@@ -147,13 +153,10 @@ def validate_azure_ai_project(o: object) -> AzureAIProject:
147
153
  if isinstance(o[field_name], expected_type):
148
154
  continue
149
155
 
150
- msg = f"Expected azure_ai_project field {field_name!r} to be of type {expected_type}."
151
-
156
+ msg = f"Invalid type for field '{field_name}'. Expected {expected_type}, but got {type(o[field_name])}."
152
157
  raise EvaluationException(
153
- message=f"{msg}. Got {type(o[field_name])}.",
154
- internal_message=msg,
155
- target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
156
- category=ErrorCategory.MISSING_FIELD,
158
+ message=msg,
159
+ category=ErrorCategory.INVALID_VALUE,
157
160
  blame=ErrorBlame.USER_ERROR,
158
161
  )
159
162
 
@@ -270,3 +273,139 @@ def _validate_typed_dict(o: object, t: Type[T_TypedDict]) -> T_TypedDict:
270
273
  validate_annotation(v, annotations[k])
271
274
 
272
275
  return cast(T_TypedDict, o)
276
+
277
+
278
+ def parse_quality_evaluator_reason_score(llm_output: str) -> Tuple[float, str]:
279
+ """Parse the output of prompt-based quality evaluators that return a score and reason.
280
+
281
+ Current supported evaluators:
282
+ - Fluency
283
+ - Relevance
284
+ - Retrieval
285
+ - Groundedness
286
+ - Coherence
287
+
288
+ :param llm_output: The output of the prompt-based quality evaluator.
289
+ :type llm_output: str
290
+ :return: The score and reason.
291
+ :rtype: Tuple[float, str]
292
+ """
293
+ score = math.nan
294
+ reason = ""
295
+ if llm_output:
296
+ score_pattern = r"<S2>(.*?)</S2>"
297
+ reason_pattern = r"<S1>(.*?)</S1>"
298
+ score_match = re.findall(score_pattern, llm_output, re.DOTALL)
299
+ reason_match = re.findall(reason_pattern, llm_output, re.DOTALL)
300
+ if score_match:
301
+ score = float(score_match[0].strip())
302
+ if reason_match:
303
+ reason = reason_match[0].strip()
304
+
305
+ return score, reason
306
+
307
+
308
+ def remove_optional_singletons(eval_class, singletons):
309
+ required_singletons = singletons.copy()
310
+ if hasattr(eval_class, "_OPTIONAL_PARAMS"): # pylint: disable=protected-access
311
+ for param in eval_class._OPTIONAL_PARAMS: # pylint: disable=protected-access
312
+ if param in singletons:
313
+ del required_singletons[param]
314
+ return required_singletons
315
+
316
+
317
+ def retrieve_content_type(assistant_messages: List, metric: str) -> str:
318
+ """Get the content type for service payload.
319
+
320
+ :param assistant_messages: The list of messages to be annotated by evaluation service
321
+ :type assistant_messages: list
322
+ :param metric: A string representing the metric type
323
+ :type metric: str
324
+ :return: A text representing the content type. Example: 'text', or 'image'
325
+ :rtype: str
326
+ """
327
+ # Check if metric is "protected_material"
328
+ if metric == "protected_material":
329
+ return "image"
330
+
331
+ # Iterate through each message
332
+ for item in assistant_messages:
333
+ # Ensure "content" exists in the message and is iterable
334
+ content = item.get("content", [])
335
+ for message in content:
336
+ if message.get("type", "") == "image_url":
337
+ return "image"
338
+ # Default return if no image was found
339
+ return "text"
340
+
341
+
342
+ def validate_conversation(conversation):
343
+ def raise_exception(msg, target):
344
+ raise EvaluationException(
345
+ message=msg,
346
+ internal_message=msg,
347
+ target=target,
348
+ category=ErrorCategory.INVALID_VALUE,
349
+ blame=ErrorBlame.USER_ERROR,
350
+ )
351
+
352
+ if not conversation or "messages" not in conversation:
353
+ raise_exception(
354
+ "Attribute 'messages' is missing in the request",
355
+ ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
356
+ )
357
+ messages = conversation["messages"]
358
+ if not isinstance(messages, list):
359
+ raise_exception(
360
+ "'messages' parameter must be a JSON-compatible list of chat messages",
361
+ ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
362
+ )
363
+ expected_roles = {"user", "assistant", "system"}
364
+ image_found = False
365
+ for num, message in enumerate(messages, 1):
366
+ if not isinstance(message, dict):
367
+ try:
368
+ from azure.ai.inference.models import (
369
+ ChatRequestMessage,
370
+ UserMessage,
371
+ AssistantMessage,
372
+ SystemMessage,
373
+ ImageContentItem,
374
+ )
375
+ except ImportError as ex:
376
+ raise MissingRequiredPackage(
377
+ message="Please install 'azure-ai-inference' package to use SystemMessage, AssistantMessage"
378
+ ) from ex
379
+
380
+ if isinstance(messages[0], ChatRequestMessage) and not isinstance(
381
+ message, (UserMessage, AssistantMessage, SystemMessage)
382
+ ):
383
+ raise_exception(
384
+ f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
385
+ ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
386
+ )
387
+
388
+ if isinstance(message.content, list) and any(
389
+ isinstance(item, ImageContentItem) for item in message.content
390
+ ):
391
+ image_found = True
392
+ continue
393
+ if message.get("role") not in expected_roles:
394
+ raise_exception(
395
+ f"Invalid role provided: {message.get('role')}. Message number: {num}",
396
+ ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
397
+ )
398
+ content = message.get("content")
399
+ if not isinstance(content, (str, list)):
400
+ raise_exception(
401
+ f"Content in each turn must be a string or array. Message number: {num}",
402
+ ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
403
+ )
404
+ if isinstance(content, list):
405
+ if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
406
+ image_found = True
407
+ if not image_found:
408
+ raise_exception(
409
+ "Message needs to have multi-modal input like images.",
410
+ ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
411
+ )
@@ -7,13 +7,13 @@ from typing import Literal
7
7
  class EvaluationMetrics:
8
8
  """Metrics for model evaluation."""
9
9
 
10
- GPT_GROUNDEDNESS = "gpt_groundedness"
11
- GPT_RELEVANCE = "gpt_relevance"
12
- GPT_COHERENCE = "gpt_coherence"
13
- GPT_FLUENCY = "gpt_fluency"
14
- GPT_SIMILARITY = "gpt_similarity"
10
+ GROUNDEDNESS = "groundedness"
11
+ RELEVANCE = "relevance"
12
+ COHERENCE = "coherence"
13
+ FLUENCY = "fluency"
14
+ SIMILARITY = "similarity"
15
15
  F1_SCORE = "f1_score"
16
- GPT_RETRIEVAL_SCORE = "gpt_retrieval_score"
16
+ RETRIEVAL_SCORE = "retrieval_score"
17
17
  HATE_FAIRNESS = "hate_fairness"
18
18
  HATE_UNFAIRNESS = "hate_unfairness"
19
19
  VIOLENCE = "violence"
@@ -1,8 +1,9 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
- from .batch_run_context import BatchRunContext
4
+ from .eval_run_context import EvalRunContext
5
5
  from .code_client import CodeClient
6
6
  from .proxy_client import ProxyClient
7
+ from .target_run_context import TargetRunContext
7
8
 
8
- __all__ = ["CodeClient", "ProxyClient", "BatchRunContext"]
9
+ __all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
@@ -22,13 +22,13 @@ from .code_client import CodeClient
22
22
  from .proxy_client import ProxyClient
23
23
 
24
24
 
25
- class BatchRunContext:
26
- """Context manager for batch run clients.
25
+ class EvalRunContext:
26
+ """Context manager for eval batch run.
27
27
 
28
28
  :param client: The client to run in the context.
29
29
  :type client: Union[
30
- ~azure.ai.evaluation._evaluate._batch_run_client.code_client.CodeClient,
31
- ~azure.ai.evaluation._evaluate._batch_run_client.proxy_client.ProxyClient
30
+ ~azure.ai.evaluation._evaluate._batch_run.code_client.CodeClient,
31
+ ~azure.ai.evaluation._evaluate._batch_run.proxy_client.ProxyClient
32
32
  ]
33
33
  """
34
34
 
@@ -1,13 +1,16 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
+
5
+ # pylint: disable=protected-access
6
+
4
7
  import inspect
5
8
  import logging
6
9
  import math
7
10
  import os
11
+ from collections import OrderedDict
8
12
  from concurrent.futures import Future
9
13
  from typing import Any, Callable, Dict, Optional, Union
10
- from collections import OrderedDict
11
14
 
12
15
  import pandas as pd
13
16
  from promptflow.client import PFClient
@@ -37,7 +40,7 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
37
40
  **kwargs
38
41
  ) -> ProxyRun:
39
42
  flow_to_run = flow
40
- if hasattr(flow, "_to_async"):
43
+ if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
41
44
  flow_to_run = flow._to_async() # pylint: disable=protected-access
42
45
 
43
46
  batch_use_async = self._should_batch_use_async(flow_to_run)
@@ -77,7 +80,7 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
77
80
 
78
81
  @staticmethod
79
82
  def _should_batch_use_async(flow):
80
- if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
83
+ if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
81
84
  if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
82
85
  return True
83
86
  if inspect.iscoroutinefunction(flow):
@@ -0,0 +1,35 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ import os
5
+ import types
6
+ from typing import Optional, Type
7
+
8
+ from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
9
+
10
+
11
+ class TargetRunContext:
12
+ """Context manager for target batch run.
13
+
14
+ :param upload_snapshot: Whether to upload target snapshot.
15
+ :type upload_snapshot: bool
16
+ """
17
+
18
+ def __init__(self, upload_snapshot: bool) -> None:
19
+ self._upload_snapshot = upload_snapshot
20
+
21
+ def __enter__(self) -> None:
22
+ # Address "[WinError 32] The process cannot access the file" error,
23
+ # caused by conflicts when the venv and target function are in the same directory.
24
+ # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
25
+ if not self._upload_snapshot:
26
+ os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
27
+
28
+ def __exit__(
29
+ self,
30
+ exc_type: Optional[Type[BaseException]],
31
+ exc_value: Optional[BaseException],
32
+ exc_tb: Optional[types.TracebackType],
33
+ ) -> None:
34
+ if not self._upload_snapshot:
35
+ os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
@@ -21,6 +21,7 @@ from azure.ai.evaluation._http_utils import get_http_client
21
21
  from azure.ai.evaluation._version import VERSION
22
22
  from azure.core.pipeline.policies import RetryPolicy
23
23
  from azure.core.rest import HttpResponse
24
+ from azure.core.exceptions import HttpResponseError
24
25
 
25
26
  LOGGER = logging.getLogger(__name__)
26
27
 
@@ -443,10 +444,26 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
443
444
  datastore = self._ml_client.datastores.get_default(include_secrets=True)
444
445
  account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
445
446
  svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
446
- for local, remote in zip(local_paths, remote_paths["paths"]):
447
- blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
448
- with open(local, "rb") as fp:
449
- blob_client.upload_blob(fp, overwrite=True)
447
+ try:
448
+ for local, remote in zip(local_paths, remote_paths["paths"]):
449
+ blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
450
+ with open(local, "rb") as fp:
451
+ blob_client.upload_blob(fp, overwrite=True)
452
+ except HttpResponseError as ex:
453
+ if ex.status_code == 403:
454
+ msg = (
455
+ "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
456
+ " Please ensure that the necessary access rights are granted."
457
+ )
458
+ raise EvaluationException(
459
+ message=msg,
460
+ target=ErrorTarget.EVAL_RUN,
461
+ category=ErrorCategory.FAILED_REMOTE_TRACKING,
462
+ blame=ErrorBlame.USER_ERROR,
463
+ tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
464
+ ) from ex
465
+
466
+ raise ex
450
467
 
451
468
  # To show artifact in UI we will need to register it. If it is a promptflow run,
452
469
  # we are rewriting already registered artifact and need to skip this step.