azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. azure/ai/evaluation/__init__.py +82 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/_experimental.py +172 -0
  4. azure/ai/evaluation/_common/constants.py +72 -0
  5. azure/ai/evaluation/_common/math.py +89 -0
  6. azure/ai/evaluation/_common/rai_service.py +632 -0
  7. azure/ai/evaluation/_common/utils.py +445 -0
  8. azure/ai/evaluation/_constants.py +72 -0
  9. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  10. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
  11. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
  12. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
  13. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
  14. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  15. azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
  16. azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
  17. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
  18. azure/ai/evaluation/_evaluate/_utils.py +298 -0
  19. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  20. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  21. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
  22. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  23. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
  24. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
  25. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  26. azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
  27. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
  28. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  47. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  48. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  49. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
  50. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  51. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  52. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  53. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  54. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  55. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  56. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  57. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  58. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  59. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
  60. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
  62. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
  64. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
  65. azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
  66. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
  67. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  68. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  69. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  70. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  72. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  73. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
  74. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
  75. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  76. azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
  77. azure/ai/evaluation/_exceptions.py +128 -0
  78. azure/ai/evaluation/_http_utils.py +466 -0
  79. azure/ai/evaluation/_model_configurations.py +123 -0
  80. azure/ai/evaluation/_user_agent.py +6 -0
  81. azure/ai/evaluation/_vendor/__init__.py +3 -0
  82. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  83. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  84. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  85. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  86. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  87. azure/ai/evaluation/_version.py +5 -0
  88. azure/ai/evaluation/py.typed +0 -0
  89. azure/ai/evaluation/simulator/__init__.py +16 -0
  90. azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
  91. azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
  92. azure/ai/evaluation/simulator/_constants.py +27 -0
  93. azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
  94. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  95. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  96. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  97. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  98. azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
  99. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  100. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  101. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
  102. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
  103. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  104. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
  105. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
  106. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
  107. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
  108. azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
  109. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  110. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
  111. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
  112. azure/ai/evaluation/simulator/_simulator.py +716 -0
  113. azure/ai/evaluation/simulator/_tracing.py +89 -0
  114. azure/ai/evaluation/simulator/_utils.py +132 -0
  115. azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
  116. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
  117. azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
  118. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
  119. azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
  120. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  121. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  122. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,571 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ import contextlib
5
+ import dataclasses
6
+ import enum
7
+ import logging
8
+ import os
9
+ import posixpath
10
+ import time
11
+ import types
12
+ import uuid
13
+ from typing import Any, Dict, List, Optional, Set, Type
14
+ from urllib.parse import urlparse
15
+
16
+ from promptflow._sdk.entities import Run
17
+ from typing_extensions import Self
18
+
19
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
20
+ from azure.ai.evaluation._http_utils import get_http_client
21
+ from azure.ai.evaluation._version import VERSION
22
+ from azure.core.pipeline.policies import RetryPolicy
23
+ from azure.core.rest import HttpResponse
24
+ from azure.core.exceptions import HttpResponseError
25
+
26
+ LOGGER = logging.getLogger(__name__)
27
+
28
+
29
+ # Handle optional import. The azure libraries are only present if
30
+ # promptflow-azure is installed.
31
+ try:
32
+ from azure.ai.ml import MLClient
33
+ from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
34
+ from azure.ai.ml.entities._datastore.datastore import Datastore
35
+ from azure.storage.blob import BlobServiceClient
36
+ except (ModuleNotFoundError, ImportError):
37
+ raise EvaluationException( # pylint: disable=raise-missing-from
38
+ message=(
39
+ "The required packages for remote tracking are missing.\n"
40
+ 'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
41
+ ),
42
+ target=ErrorTarget.EVALUATE,
43
+ category=ErrorCategory.MISSING_PACKAGE,
44
+ blame=ErrorBlame.USER_ERROR,
45
+ )
46
+
47
+
48
+ @dataclasses.dataclass
49
+ class RunInfo:
50
+ """
51
+ A holder for run info, needed for logging.
52
+ """
53
+
54
+ run_id: str
55
+ experiment_id: str
56
+ run_name: str
57
+
58
+ @staticmethod
59
+ def generate(run_name: Optional[str]) -> "RunInfo":
60
+ """
61
+ Generate the new RunInfo instance with the RunID and Experiment ID.
62
+
63
+ **Note:** This code is used when we are in failed state and cannot get a run.
64
+
65
+ :param run_name: The name of a run.
66
+ :type run_name: Optional[str]
67
+ :return: The RunInfo instance.
68
+ :rtype: azure.ai.evaluation._evaluate._eval_run.RunInfo
69
+ """
70
+ return RunInfo(str(uuid.uuid4()), str(uuid.uuid4()), run_name or "")
71
+
72
+
73
+ class RunStatus(enum.Enum):
74
+ """Run states."""
75
+
76
+ NOT_STARTED = 0
77
+ STARTED = 1
78
+ BROKEN = 2
79
+ TERMINATED = 3
80
+
81
+
82
+ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-instance-attributes
83
+ """
84
+ The simple singleton run class, used for accessing artifact store.
85
+
86
+ :param run_name: The name of the run.
87
+ :type run_name: Optional[str]
88
+ :param tracking_uri: Tracking URI for this run; required to make calls.
89
+ :type tracking_uri: str
90
+ :param subscription_id: The subscription ID used to track run.
91
+ :type subscription_id: str
92
+ :param group_name: The resource group used to track run.
93
+ :type group_name: str
94
+ :param workspace_name: The name of workspace/project used to track run.
95
+ :type workspace_name: str
96
+ :param ml_client: The ml client used for authentication into Azure.
97
+ :type ml_client: azure.ai.ml.MLClient
98
+ :param promptflow_run: The promptflow run used by the
99
+ """
100
+
101
+ _MAX_RETRIES = 5
102
+ _BACKOFF_FACTOR = 2
103
+ _TIMEOUT = 5
104
+ _SCOPE = "https://management.azure.com/.default"
105
+
106
+ EVALUATION_ARTIFACT = "instance_results.jsonl"
107
+
108
+ def __init__(
109
+ self,
110
+ run_name: Optional[str],
111
+ tracking_uri: str,
112
+ subscription_id: str,
113
+ group_name: str,
114
+ workspace_name: str,
115
+ ml_client: "MLClient",
116
+ promptflow_run: Optional[Run] = None,
117
+ ) -> None:
118
+ self._tracking_uri: str = tracking_uri
119
+ self._subscription_id: str = subscription_id
120
+ self._resource_group_name: str = group_name
121
+ self._workspace_name: str = workspace_name
122
+ self._ml_client: Any = ml_client
123
+ self._is_promptflow_run: bool = promptflow_run is not None
124
+ self._run_name = run_name
125
+ self._promptflow_run = promptflow_run
126
+ self._status = RunStatus.NOT_STARTED
127
+ self._url_base: Optional[str] = None
128
+ self._info: Optional[RunInfo] = None
129
+
130
+ @property
131
+ def status(self) -> RunStatus:
132
+ """
133
+ Return the run status.
134
+
135
+ :return: The status of the run.
136
+ :rtype: promptflow._sdk._constants.RunStatus
137
+ """
138
+ return self._status
139
+
140
+ @property
141
+ def info(self) -> RunInfo:
142
+ if self._info is None:
143
+ msg = "Run info is missing"
144
+ raise EvaluationException(
145
+ message=msg,
146
+ internal_message=msg,
147
+ target=ErrorTarget.EVAL_RUN,
148
+ category=ErrorCategory.UNKNOWN,
149
+ blame=ErrorBlame.UNKNOWN,
150
+ )
151
+
152
+ return self._info
153
+
154
+ def _get_scope(self) -> str:
155
+ """
156
+ Return the scope information for the workspace.
157
+
158
+ :return: The scope information for the workspace.
159
+ :rtype: str
160
+ """
161
+ return (
162
+ "/subscriptions/{}/resourceGroups/{}/providers" "/Microsoft.MachineLearningServices" "/workspaces/{}"
163
+ ).format(
164
+ self._subscription_id,
165
+ self._resource_group_name,
166
+ self._workspace_name,
167
+ )
168
+
169
+ def _start_run(self) -> None:
170
+ """
171
+ Start the run, or, if it is not applicable (for example, if tracking is not enabled), mark it as started.
172
+ """
173
+ self._check_state_and_log("start run", {v for v in RunStatus if v != RunStatus.NOT_STARTED}, True)
174
+ self._status = RunStatus.STARTED
175
+ if self._tracking_uri is None:
176
+ LOGGER.warning(
177
+ "A tracking_uri was not provided, The results will be saved locally, but will not be logged to Azure."
178
+ )
179
+ self._url_base = None
180
+ self._status = RunStatus.BROKEN
181
+ self._info = RunInfo.generate(self._run_name)
182
+ else:
183
+ self._url_base = urlparse(self._tracking_uri).netloc
184
+ if self._promptflow_run is not None:
185
+ self._info = RunInfo(
186
+ self._promptflow_run.name,
187
+ self._promptflow_run._experiment_name, # pylint: disable=protected-access
188
+ self._promptflow_run.name,
189
+ )
190
+ else:
191
+ url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/create"
192
+ body = {
193
+ "experiment_id": "0",
194
+ "user_id": "azure-ai-evaluation",
195
+ "start_time": int(time.time() * 1000),
196
+ "tags": [{"key": "mlflow.user", "value": "azure-ai-evaluation"}],
197
+ }
198
+ if self._run_name:
199
+ body["run_name"] = self._run_name
200
+ response = self.request_with_retry(url=url, method="POST", json_dict=body)
201
+ if response.status_code != 200:
202
+ self._info = RunInfo.generate(self._run_name)
203
+ LOGGER.warning(
204
+ "The run failed to start: %s: %s."
205
+ "The results will be saved locally, but will not be logged to Azure.",
206
+ response.status_code,
207
+ response.text(),
208
+ )
209
+ self._status = RunStatus.BROKEN
210
+ else:
211
+ parsed_response = response.json()
212
+ self._info = RunInfo(
213
+ run_id=parsed_response["run"]["info"]["run_id"],
214
+ experiment_id=parsed_response["run"]["info"]["experiment_id"],
215
+ run_name=parsed_response["run"]["info"]["run_name"],
216
+ )
217
+ self._status = RunStatus.STARTED
218
+
219
+ def _end_run(self, reason: str) -> None:
220
+ """
221
+ Terminate the run.
222
+
223
+ :param reason: Reason for run termination. Possible values are "FINISHED" "FAILED", and "KILLED"
224
+ :type reason: str
225
+ :raises EvaluationException: Raised if the run is not in ("FINISHED", "FAILED", "KILLED")
226
+ """
227
+ if not self._check_state_and_log(
228
+ "stop run", {RunStatus.BROKEN, RunStatus.NOT_STARTED, RunStatus.TERMINATED}, False
229
+ ):
230
+ return
231
+ if self._is_promptflow_run:
232
+ # This run is already finished, we just add artifacts/metrics to it.
233
+ self._status = RunStatus.TERMINATED
234
+ return
235
+ if reason not in ("FINISHED", "FAILED", "KILLED"):
236
+ raise EvaluationException(
237
+ message=f"Incorrect terminal status {reason}. Valid statuses are 'FINISHED', 'FAILED' and 'KILLED'.",
238
+ internal_message="Incorrect terminal status. Valid statuses are 'FINISHED', 'FAILED' and 'KILLED'",
239
+ target=ErrorTarget.EVAL_RUN,
240
+ category=ErrorCategory.FAILED_EXECUTION,
241
+ blame=ErrorBlame.UNKNOWN,
242
+ )
243
+ url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/update"
244
+ body = {
245
+ "run_uuid": self.info.run_id,
246
+ "status": reason,
247
+ "end_time": int(time.time() * 1000),
248
+ "run_id": self.info.run_id,
249
+ }
250
+ response = self.request_with_retry(url=url, method="POST", json_dict=body)
251
+ if response.status_code != 200:
252
+ LOGGER.warning("Unable to terminate the run.")
253
+ self._status = RunStatus.TERMINATED
254
+
255
+ def __enter__(self) -> Self:
256
+ """The Context Manager enter call.
257
+
258
+ :return: The instance of the class.
259
+ :rtype: azure.ai.evaluation._evaluate._eval_run.EvalRun
260
+ """
261
+ self._start_run()
262
+ return self
263
+
264
+ def __exit__(
265
+ self,
266
+ exc_type: Optional[Type[BaseException]],
267
+ exc_value: Optional[BaseException],
268
+ exc_tb: Optional[types.TracebackType],
269
+ ) -> None:
270
+ """The context manager exit call.
271
+
272
+ :param exc_type: The exception type
273
+ :type exc_type: Optional[Type[BaseException]]
274
+ :param exc_value: The exception value
275
+ :type exc_value: Optional[BaseException]
276
+ :param exc_tb: The exception traceback
277
+ :type exc_tb: Optional[types.TracebackType]
278
+ """
279
+ self._end_run("FINISHED")
280
+
281
+ def get_run_history_uri(self) -> str:
282
+ """
283
+ Get the run history service URI.
284
+
285
+ :return: The run history service URI.
286
+ :rtype: str
287
+ """
288
+ return (
289
+ f"https://{self._url_base}"
290
+ "/history/v1.0"
291
+ f"{self._get_scope()}"
292
+ f"/experimentids/{self.info.experiment_id}/runs/{self.info.run_id}"
293
+ )
294
+
295
+ def get_artifacts_uri(self) -> str:
296
+ """
297
+ Gets the URI to upload the artifacts to.
298
+
299
+ :return: The URI to upload the artifacts to.
300
+ :rtype: str
301
+ """
302
+ return self.get_run_history_uri() + "/artifacts/batch/metadata"
303
+
304
+ def get_metrics_url(self):
305
+ """
306
+ Return the url needed to track the mlflow metrics.
307
+
308
+ :return: The url needed to track the mlflow metrics.
309
+ :rtype: str
310
+ """
311
+ return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
312
+
313
+ def _get_token(self):
314
+ # We have to use lazy import because promptflow.azure
315
+ # is an optional dependency.
316
+ from promptflow.azure._utils._token_cache import ArmTokenCache # pylint: disable=import-error,no-name-in-module
317
+
318
+ return ArmTokenCache().get_token(self._ml_client._credential) # pylint: disable=protected-access
319
+
320
+ def request_with_retry(
321
+ self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
322
+ ) -> HttpResponse:
323
+ """
324
+ Send the request with retries.
325
+
326
+ :param url: The url to send the request to.
327
+ :type url: str
328
+ :param method: The request method to be used.
329
+ :type method: str
330
+ :param json_dict: The json dictionary (not serialized) to be sent.
331
+ :type json_dict: Dict[str, Any]
332
+ :param headers: The headers to be sent with the request.
333
+ :type headers: Optional[Dict[str, str]]
334
+ :return: The response
335
+ :rtype: HttpResponse
336
+ """
337
+ if headers is None:
338
+ headers = {}
339
+ headers["User-Agent"] = f"promptflow/{VERSION}"
340
+ headers["Authorization"] = f"Bearer {self._get_token()}"
341
+
342
+ session = get_http_client().with_policies(
343
+ retry_policy=RetryPolicy(
344
+ retry_total=EvalRun._MAX_RETRIES,
345
+ retry_connect=EvalRun._MAX_RETRIES,
346
+ retry_read=EvalRun._MAX_RETRIES,
347
+ retry_status=EvalRun._MAX_RETRIES,
348
+ retry_on_status_codes=(408, 429, 500, 502, 503, 504),
349
+ retry_backoff_factor=EvalRun._BACKOFF_FACTOR,
350
+ )
351
+ )
352
+ return session.request(method, url, headers=headers, json=json_dict, timeout=EvalRun._TIMEOUT)
353
+
354
+ def _log_warning(self, failed_op: str, response: HttpResponse) -> None:
355
+ """
356
+ Log the error if request was not successful.
357
+
358
+ :param failed_op: The user-friendly message for the failed operation.
359
+ :type failed_op: str
360
+ :param response: The request.
361
+ :type response: HttpResponse
362
+ """
363
+ LOGGER.warning(
364
+ "Unable to %s, the request failed with status code %s, response.text()=%s.",
365
+ failed_op,
366
+ response.status_code,
367
+ response.text(),
368
+ )
369
+
370
+ def _check_state_and_log(self, action: str, bad_states: Set[RunStatus], should_raise: bool) -> bool:
371
+ """
372
+ Check that the run is in the correct state and log worning if it is not.
373
+
374
+ :param action: Action, which caused this check. For example if it is "log artifact",
375
+ the log message will start "Unable to log artifact."
376
+ :type action: str
377
+ :param bad_states: The states, considered invalid for given action.
378
+ :type bad_states: Set[RunStatus]
379
+ :param should_raise: Should we raise an error if the bad state has been encountered
380
+ :type should_raise: bool
381
+ :raises: ~azure.ai.evaluations._exceptions.EvaluationException if should_raise is True
382
+ and invalid state was encountered.
383
+ :return: Whether or not run is in the correct state.
384
+ :rtype: bool
385
+ """
386
+ if self._status in bad_states:
387
+ msg = f"Unable to {action} due to Run status={self._status}."
388
+ if should_raise:
389
+ raise EvaluationException(
390
+ message=msg,
391
+ internal_message=msg,
392
+ target=ErrorTarget.EVAL_RUN,
393
+ category=ErrorCategory.FAILED_EXECUTION,
394
+ blame=ErrorBlame.UNKNOWN,
395
+ )
396
+ LOGGER.warning(msg)
397
+ return False
398
+ return True
399
+
400
+ def log_artifact(self, artifact_folder: str, artifact_name: str = EVALUATION_ARTIFACT) -> None:
401
+ """
402
+ The local implementation of mlflow-like artifact logging.
403
+
404
+ **Note:** In the current implementation we are not using the thread pool executor
405
+ as it is done in azureml-mlflow, instead we are just running upload in cycle as we are not
406
+ expecting uploading a lot of artifacts.
407
+
408
+ :param artifact_folder: The folder with artifacts to be uploaded.
409
+ :type artifact_folder: str
410
+ :param artifact_name: The name of the artifact to be uploaded. Defaults to
411
+ azure.ai.evaluation._evaluate._eval_run.EvalRun.EVALUATION_ARTIFACT.
412
+ :type artifact_name: str
413
+ """
414
+ if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
415
+ return
416
+ # Check if artifact directory is empty or does not exist.
417
+ if not os.path.isdir(artifact_folder):
418
+ LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
419
+ return
420
+ if not os.listdir(artifact_folder):
421
+ LOGGER.warning("The path to the artifact is empty.")
422
+ return
423
+ if not os.path.isfile(os.path.join(artifact_folder, artifact_name)):
424
+ LOGGER.warning("The run results file was not found, skipping artifacts upload.")
425
+ return
426
+ # First we will list the files and the appropriate remote paths for them.
427
+ root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
428
+ remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
429
+ local_paths = []
430
+ # Go over the artifact folder and upload all artifacts.
431
+ for root, _, filenames in os.walk(artifact_folder):
432
+ upload_path = root_upload_path
433
+ if root != artifact_folder:
434
+ rel_path = os.path.relpath(root, artifact_folder)
435
+ if rel_path != ".":
436
+ upload_path = posixpath.join(root_upload_path, rel_path)
437
+ for f in filenames:
438
+ remote_file_path = posixpath.join(upload_path, f)
439
+ remote_paths["paths"].append({"path": remote_file_path})
440
+ local_file_path = os.path.join(root, f)
441
+ local_paths.append(local_file_path)
442
+
443
+ # We will write the artifacts to the workspaceblobstore
444
+ datastore = self._ml_client.datastores.get_default(include_secrets=True)
445
+ account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
446
+ svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
447
+ try:
448
+ for local, remote in zip(local_paths, remote_paths["paths"]):
449
+ blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
450
+ with open(local, "rb") as fp:
451
+ blob_client.upload_blob(fp, overwrite=True)
452
+ except HttpResponseError as ex:
453
+ if ex.status_code == 403:
454
+ msg = (
455
+ "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
456
+ " Please ensure that the necessary access rights are granted."
457
+ )
458
+ raise EvaluationException(
459
+ message=msg,
460
+ target=ErrorTarget.EVAL_RUN,
461
+ category=ErrorCategory.FAILED_REMOTE_TRACKING,
462
+ blame=ErrorBlame.USER_ERROR,
463
+ tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
464
+ ) from ex
465
+
466
+ raise ex
467
+
468
+ # To show artifact in UI we will need to register it. If it is a promptflow run,
469
+ # we are rewriting already registered artifact and need to skip this step.
470
+ if self._is_promptflow_run:
471
+ return
472
+
473
+ url = (
474
+ f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
475
+ f"/resourceGroups/{self._resource_group_name}/providers/"
476
+ f"Microsoft.MachineLearningServices/workspaces/{self._workspace_name}/artifacts/register"
477
+ )
478
+
479
+ response = self.request_with_retry(
480
+ url=url,
481
+ method="POST",
482
+ json_dict={
483
+ "origin": "ExperimentRun",
484
+ "container": f"dcid.{self.info.run_id}",
485
+ "path": artifact_name,
486
+ "dataPath": {
487
+ "dataStoreName": datastore.name,
488
+ "relativePath": posixpath.join(root_upload_path, artifact_name),
489
+ },
490
+ },
491
+ )
492
+ if response.status_code != 200:
493
+ self._log_warning("register artifact", response)
494
+
495
+ # register artifacts for images if exists in image folder
496
+ try:
497
+ for remote_path in remote_paths["paths"]:
498
+ remote_file_path = remote_path["path"]
499
+ if "images" in os.path.normpath(remote_file_path).split(os.sep):
500
+ response = self.request_with_retry(
501
+ url=url,
502
+ method="POST",
503
+ json_dict={
504
+ "origin": "ExperimentRun",
505
+ "container": f"dcid.{self.info.run_id}",
506
+ "path": posixpath.join("images", os.path.basename(remote_file_path)),
507
+ "dataPath": {
508
+ "dataStoreName": datastore.name,
509
+ "relativePath": remote_file_path,
510
+ },
511
+ },
512
+ )
513
+ if response.status_code != 200:
514
+ self._log_warning("register image artifact", response)
515
+ except Exception as ex: # pylint: disable=broad-exception-caught
516
+ LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
517
+
518
+ def _get_datastore_credential(self, datastore: "Datastore"):
519
+ # Reference the logic in azure.ai.ml._artifact._artifact_utilities
520
+ # https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103
521
+ credential = datastore.credentials
522
+ if isinstance(credential, AccountKeyConfiguration):
523
+ return credential.account_key
524
+ if hasattr(credential, "sas_token"):
525
+ return credential.sas_token
526
+ return self._ml_client.datastores._credential # pylint: disable=protected-access
527
+
528
+ def log_metric(self, key: str, value: float) -> None:
529
+ """
530
+ Log the metric to azure similar to how it is done by mlflow.
531
+
532
+ :param key: The metric name to be logged.
533
+ :type key: str
534
+ :param value: The valure to be logged.
535
+ :type value: float
536
+ """
537
+ if not self._check_state_and_log("log metric", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
538
+ return
539
+ body = {
540
+ "run_uuid": self.info.run_id,
541
+ "key": key,
542
+ "value": value,
543
+ "timestamp": int(time.time() * 1000),
544
+ "step": 0,
545
+ "run_id": self.info.run_id,
546
+ }
547
+ response = self.request_with_retry(
548
+ url=self.get_metrics_url(),
549
+ method="POST",
550
+ json_dict=body,
551
+ )
552
+ if response.status_code != 200:
553
+ self._log_warning("save metrics", response)
554
+
555
+ def write_properties_to_run_history(self, properties: Dict[str, Any]) -> None:
556
+ """
557
+ Write properties to the RunHistory service.
558
+
559
+ :param properties: The properties to be written to run history.
560
+ :type properties: dict
561
+ """
562
+ if not self._check_state_and_log("write properties", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
563
+ return
564
+ # update host to run history and request PATCH API
565
+ response = self.request_with_retry(
566
+ url=self.get_run_history_uri(),
567
+ method="PATCH",
568
+ json_dict={"runId": self.info.run_id, "properties": properties},
569
+ )
570
+ if response.status_code != 200:
571
+ LOGGER.error("Fail writing properties '%s' to run history: %s", properties, response.text())