azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +82 -0
- azure/ai/evaluation/_common/__init__.py +16 -0
- azure/ai/evaluation/_common/_experimental.py +172 -0
- azure/ai/evaluation/_common/constants.py +72 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/rai_service.py +632 -0
- azure/ai/evaluation/_common/utils.py +445 -0
- azure/ai/evaluation/_constants.py +72 -0
- azure/ai/evaluation/_evaluate/__init__.py +3 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
- azure/ai/evaluation/_evaluate/_utils.py +298 -0
- azure/ai/evaluation/_evaluators/__init__.py +3 -0
- azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
- azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
- azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
- azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
- azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
- azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
- azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
- azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
- azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
- azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
- azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
- azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
- azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
- azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
- azure/ai/evaluation/_exceptions.py +128 -0
- azure/ai/evaluation/_http_utils.py +466 -0
- azure/ai/evaluation/_model_configurations.py +123 -0
- azure/ai/evaluation/_user_agent.py +6 -0
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +5 -0
- azure/ai/evaluation/py.typed +0 -0
- azure/ai/evaluation/simulator/__init__.py +16 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
- azure/ai/evaluation/simulator/_constants.py +27 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
- azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
- azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
- azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
- azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
- azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
- azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
- azure/ai/evaluation/simulator/_simulator.py +716 -0
- azure/ai/evaluation/simulator/_tracing.py +89 -0
- azure/ai/evaluation/simulator/_utils.py +132 -0
- azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
- azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
- azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
- {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
- azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
- azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
- azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,571 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import contextlib
|
|
5
|
+
import dataclasses
|
|
6
|
+
import enum
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import posixpath
|
|
10
|
+
import time
|
|
11
|
+
import types
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set, Type
|
|
14
|
+
from urllib.parse import urlparse
|
|
15
|
+
|
|
16
|
+
from promptflow._sdk.entities import Run
|
|
17
|
+
from typing_extensions import Self
|
|
18
|
+
|
|
19
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
20
|
+
from azure.ai.evaluation._http_utils import get_http_client
|
|
21
|
+
from azure.ai.evaluation._version import VERSION
|
|
22
|
+
from azure.core.pipeline.policies import RetryPolicy
|
|
23
|
+
from azure.core.rest import HttpResponse
|
|
24
|
+
from azure.core.exceptions import HttpResponseError
|
|
25
|
+
|
|
26
|
+
LOGGER = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Handle optional import. The azure libraries are only present if
|
|
30
|
+
# promptflow-azure is installed.
|
|
31
|
+
try:
|
|
32
|
+
from azure.ai.ml import MLClient
|
|
33
|
+
from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
|
|
34
|
+
from azure.ai.ml.entities._datastore.datastore import Datastore
|
|
35
|
+
from azure.storage.blob import BlobServiceClient
|
|
36
|
+
except (ModuleNotFoundError, ImportError):
|
|
37
|
+
raise EvaluationException( # pylint: disable=raise-missing-from
|
|
38
|
+
message=(
|
|
39
|
+
"The required packages for remote tracking are missing.\n"
|
|
40
|
+
'To resolve this, please install them by running "pip install azure-ai-evaluation[remote]".'
|
|
41
|
+
),
|
|
42
|
+
target=ErrorTarget.EVALUATE,
|
|
43
|
+
category=ErrorCategory.MISSING_PACKAGE,
|
|
44
|
+
blame=ErrorBlame.USER_ERROR,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclasses.dataclass
|
|
49
|
+
class RunInfo:
|
|
50
|
+
"""
|
|
51
|
+
A holder for run info, needed for logging.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
run_id: str
|
|
55
|
+
experiment_id: str
|
|
56
|
+
run_name: str
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def generate(run_name: Optional[str]) -> "RunInfo":
|
|
60
|
+
"""
|
|
61
|
+
Generate the new RunInfo instance with the RunID and Experiment ID.
|
|
62
|
+
|
|
63
|
+
**Note:** This code is used when we are in failed state and cannot get a run.
|
|
64
|
+
|
|
65
|
+
:param run_name: The name of a run.
|
|
66
|
+
:type run_name: Optional[str]
|
|
67
|
+
:return: The RunInfo instance.
|
|
68
|
+
:rtype: azure.ai.evaluation._evaluate._eval_run.RunInfo
|
|
69
|
+
"""
|
|
70
|
+
return RunInfo(str(uuid.uuid4()), str(uuid.uuid4()), run_name or "")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RunStatus(enum.Enum):
|
|
74
|
+
"""Run states."""
|
|
75
|
+
|
|
76
|
+
NOT_STARTED = 0
|
|
77
|
+
STARTED = 1
|
|
78
|
+
BROKEN = 2
|
|
79
|
+
TERMINATED = 3
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-instance-attributes
|
|
83
|
+
"""
|
|
84
|
+
The simple singleton run class, used for accessing artifact store.
|
|
85
|
+
|
|
86
|
+
:param run_name: The name of the run.
|
|
87
|
+
:type run_name: Optional[str]
|
|
88
|
+
:param tracking_uri: Tracking URI for this run; required to make calls.
|
|
89
|
+
:type tracking_uri: str
|
|
90
|
+
:param subscription_id: The subscription ID used to track run.
|
|
91
|
+
:type subscription_id: str
|
|
92
|
+
:param group_name: The resource group used to track run.
|
|
93
|
+
:type group_name: str
|
|
94
|
+
:param workspace_name: The name of workspace/project used to track run.
|
|
95
|
+
:type workspace_name: str
|
|
96
|
+
:param ml_client: The ml client used for authentication into Azure.
|
|
97
|
+
:type ml_client: azure.ai.ml.MLClient
|
|
98
|
+
:param promptflow_run: The promptflow run used by the
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
_MAX_RETRIES = 5
|
|
102
|
+
_BACKOFF_FACTOR = 2
|
|
103
|
+
_TIMEOUT = 5
|
|
104
|
+
_SCOPE = "https://management.azure.com/.default"
|
|
105
|
+
|
|
106
|
+
EVALUATION_ARTIFACT = "instance_results.jsonl"
|
|
107
|
+
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
run_name: Optional[str],
|
|
111
|
+
tracking_uri: str,
|
|
112
|
+
subscription_id: str,
|
|
113
|
+
group_name: str,
|
|
114
|
+
workspace_name: str,
|
|
115
|
+
ml_client: "MLClient",
|
|
116
|
+
promptflow_run: Optional[Run] = None,
|
|
117
|
+
) -> None:
|
|
118
|
+
self._tracking_uri: str = tracking_uri
|
|
119
|
+
self._subscription_id: str = subscription_id
|
|
120
|
+
self._resource_group_name: str = group_name
|
|
121
|
+
self._workspace_name: str = workspace_name
|
|
122
|
+
self._ml_client: Any = ml_client
|
|
123
|
+
self._is_promptflow_run: bool = promptflow_run is not None
|
|
124
|
+
self._run_name = run_name
|
|
125
|
+
self._promptflow_run = promptflow_run
|
|
126
|
+
self._status = RunStatus.NOT_STARTED
|
|
127
|
+
self._url_base: Optional[str] = None
|
|
128
|
+
self._info: Optional[RunInfo] = None
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def status(self) -> RunStatus:
|
|
132
|
+
"""
|
|
133
|
+
Return the run status.
|
|
134
|
+
|
|
135
|
+
:return: The status of the run.
|
|
136
|
+
:rtype: promptflow._sdk._constants.RunStatus
|
|
137
|
+
"""
|
|
138
|
+
return self._status
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def info(self) -> RunInfo:
|
|
142
|
+
if self._info is None:
|
|
143
|
+
msg = "Run info is missing"
|
|
144
|
+
raise EvaluationException(
|
|
145
|
+
message=msg,
|
|
146
|
+
internal_message=msg,
|
|
147
|
+
target=ErrorTarget.EVAL_RUN,
|
|
148
|
+
category=ErrorCategory.UNKNOWN,
|
|
149
|
+
blame=ErrorBlame.UNKNOWN,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return self._info
|
|
153
|
+
|
|
154
|
+
def _get_scope(self) -> str:
|
|
155
|
+
"""
|
|
156
|
+
Return the scope information for the workspace.
|
|
157
|
+
|
|
158
|
+
:return: The scope information for the workspace.
|
|
159
|
+
:rtype: str
|
|
160
|
+
"""
|
|
161
|
+
return (
|
|
162
|
+
"/subscriptions/{}/resourceGroups/{}/providers" "/Microsoft.MachineLearningServices" "/workspaces/{}"
|
|
163
|
+
).format(
|
|
164
|
+
self._subscription_id,
|
|
165
|
+
self._resource_group_name,
|
|
166
|
+
self._workspace_name,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def _start_run(self) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Start the run, or, if it is not applicable (for example, if tracking is not enabled), mark it as started.
|
|
172
|
+
"""
|
|
173
|
+
self._check_state_and_log("start run", {v for v in RunStatus if v != RunStatus.NOT_STARTED}, True)
|
|
174
|
+
self._status = RunStatus.STARTED
|
|
175
|
+
if self._tracking_uri is None:
|
|
176
|
+
LOGGER.warning(
|
|
177
|
+
"A tracking_uri was not provided, The results will be saved locally, but will not be logged to Azure."
|
|
178
|
+
)
|
|
179
|
+
self._url_base = None
|
|
180
|
+
self._status = RunStatus.BROKEN
|
|
181
|
+
self._info = RunInfo.generate(self._run_name)
|
|
182
|
+
else:
|
|
183
|
+
self._url_base = urlparse(self._tracking_uri).netloc
|
|
184
|
+
if self._promptflow_run is not None:
|
|
185
|
+
self._info = RunInfo(
|
|
186
|
+
self._promptflow_run.name,
|
|
187
|
+
self._promptflow_run._experiment_name, # pylint: disable=protected-access
|
|
188
|
+
self._promptflow_run.name,
|
|
189
|
+
)
|
|
190
|
+
else:
|
|
191
|
+
url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/create"
|
|
192
|
+
body = {
|
|
193
|
+
"experiment_id": "0",
|
|
194
|
+
"user_id": "azure-ai-evaluation",
|
|
195
|
+
"start_time": int(time.time() * 1000),
|
|
196
|
+
"tags": [{"key": "mlflow.user", "value": "azure-ai-evaluation"}],
|
|
197
|
+
}
|
|
198
|
+
if self._run_name:
|
|
199
|
+
body["run_name"] = self._run_name
|
|
200
|
+
response = self.request_with_retry(url=url, method="POST", json_dict=body)
|
|
201
|
+
if response.status_code != 200:
|
|
202
|
+
self._info = RunInfo.generate(self._run_name)
|
|
203
|
+
LOGGER.warning(
|
|
204
|
+
"The run failed to start: %s: %s."
|
|
205
|
+
"The results will be saved locally, but will not be logged to Azure.",
|
|
206
|
+
response.status_code,
|
|
207
|
+
response.text(),
|
|
208
|
+
)
|
|
209
|
+
self._status = RunStatus.BROKEN
|
|
210
|
+
else:
|
|
211
|
+
parsed_response = response.json()
|
|
212
|
+
self._info = RunInfo(
|
|
213
|
+
run_id=parsed_response["run"]["info"]["run_id"],
|
|
214
|
+
experiment_id=parsed_response["run"]["info"]["experiment_id"],
|
|
215
|
+
run_name=parsed_response["run"]["info"]["run_name"],
|
|
216
|
+
)
|
|
217
|
+
self._status = RunStatus.STARTED
|
|
218
|
+
|
|
219
|
+
def _end_run(self, reason: str) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Terminate the run.
|
|
222
|
+
|
|
223
|
+
:param reason: Reason for run termination. Possible values are "FINISHED" "FAILED", and "KILLED"
|
|
224
|
+
:type reason: str
|
|
225
|
+
:raises EvaluationException: Raised if the run is not in ("FINISHED", "FAILED", "KILLED")
|
|
226
|
+
"""
|
|
227
|
+
if not self._check_state_and_log(
|
|
228
|
+
"stop run", {RunStatus.BROKEN, RunStatus.NOT_STARTED, RunStatus.TERMINATED}, False
|
|
229
|
+
):
|
|
230
|
+
return
|
|
231
|
+
if self._is_promptflow_run:
|
|
232
|
+
# This run is already finished, we just add artifacts/metrics to it.
|
|
233
|
+
self._status = RunStatus.TERMINATED
|
|
234
|
+
return
|
|
235
|
+
if reason not in ("FINISHED", "FAILED", "KILLED"):
|
|
236
|
+
raise EvaluationException(
|
|
237
|
+
message=f"Incorrect terminal status {reason}. Valid statuses are 'FINISHED', 'FAILED' and 'KILLED'.",
|
|
238
|
+
internal_message="Incorrect terminal status. Valid statuses are 'FINISHED', 'FAILED' and 'KILLED'",
|
|
239
|
+
target=ErrorTarget.EVAL_RUN,
|
|
240
|
+
category=ErrorCategory.FAILED_EXECUTION,
|
|
241
|
+
blame=ErrorBlame.UNKNOWN,
|
|
242
|
+
)
|
|
243
|
+
url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/update"
|
|
244
|
+
body = {
|
|
245
|
+
"run_uuid": self.info.run_id,
|
|
246
|
+
"status": reason,
|
|
247
|
+
"end_time": int(time.time() * 1000),
|
|
248
|
+
"run_id": self.info.run_id,
|
|
249
|
+
}
|
|
250
|
+
response = self.request_with_retry(url=url, method="POST", json_dict=body)
|
|
251
|
+
if response.status_code != 200:
|
|
252
|
+
LOGGER.warning("Unable to terminate the run.")
|
|
253
|
+
self._status = RunStatus.TERMINATED
|
|
254
|
+
|
|
255
|
+
def __enter__(self) -> Self:
|
|
256
|
+
"""The Context Manager enter call.
|
|
257
|
+
|
|
258
|
+
:return: The instance of the class.
|
|
259
|
+
:rtype: azure.ai.evaluation._evaluate._eval_run.EvalRun
|
|
260
|
+
"""
|
|
261
|
+
self._start_run()
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
def __exit__(
|
|
265
|
+
self,
|
|
266
|
+
exc_type: Optional[Type[BaseException]],
|
|
267
|
+
exc_value: Optional[BaseException],
|
|
268
|
+
exc_tb: Optional[types.TracebackType],
|
|
269
|
+
) -> None:
|
|
270
|
+
"""The context manager exit call.
|
|
271
|
+
|
|
272
|
+
:param exc_type: The exception type
|
|
273
|
+
:type exc_type: Optional[Type[BaseException]]
|
|
274
|
+
:param exc_value: The exception value
|
|
275
|
+
:type exc_value: Optional[BaseException]
|
|
276
|
+
:param exc_tb: The exception traceback
|
|
277
|
+
:type exc_tb: Optional[types.TracebackType]
|
|
278
|
+
"""
|
|
279
|
+
self._end_run("FINISHED")
|
|
280
|
+
|
|
281
|
+
def get_run_history_uri(self) -> str:
|
|
282
|
+
"""
|
|
283
|
+
Get the run history service URI.
|
|
284
|
+
|
|
285
|
+
:return: The run history service URI.
|
|
286
|
+
:rtype: str
|
|
287
|
+
"""
|
|
288
|
+
return (
|
|
289
|
+
f"https://{self._url_base}"
|
|
290
|
+
"/history/v1.0"
|
|
291
|
+
f"{self._get_scope()}"
|
|
292
|
+
f"/experimentids/{self.info.experiment_id}/runs/{self.info.run_id}"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def get_artifacts_uri(self) -> str:
|
|
296
|
+
"""
|
|
297
|
+
Gets the URI to upload the artifacts to.
|
|
298
|
+
|
|
299
|
+
:return: The URI to upload the artifacts to.
|
|
300
|
+
:rtype: str
|
|
301
|
+
"""
|
|
302
|
+
return self.get_run_history_uri() + "/artifacts/batch/metadata"
|
|
303
|
+
|
|
304
|
+
def get_metrics_url(self):
|
|
305
|
+
"""
|
|
306
|
+
Return the url needed to track the mlflow metrics.
|
|
307
|
+
|
|
308
|
+
:return: The url needed to track the mlflow metrics.
|
|
309
|
+
:rtype: str
|
|
310
|
+
"""
|
|
311
|
+
return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
|
|
312
|
+
|
|
313
|
+
def _get_token(self):
|
|
314
|
+
# We have to use lazy import because promptflow.azure
|
|
315
|
+
# is an optional dependency.
|
|
316
|
+
from promptflow.azure._utils._token_cache import ArmTokenCache # pylint: disable=import-error,no-name-in-module
|
|
317
|
+
|
|
318
|
+
return ArmTokenCache().get_token(self._ml_client._credential) # pylint: disable=protected-access
|
|
319
|
+
|
|
320
|
+
def request_with_retry(
|
|
321
|
+
self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
|
|
322
|
+
) -> HttpResponse:
|
|
323
|
+
"""
|
|
324
|
+
Send the request with retries.
|
|
325
|
+
|
|
326
|
+
:param url: The url to send the request to.
|
|
327
|
+
:type url: str
|
|
328
|
+
:param method: The request method to be used.
|
|
329
|
+
:type method: str
|
|
330
|
+
:param json_dict: The json dictionary (not serialized) to be sent.
|
|
331
|
+
:type json_dict: Dict[str, Any]
|
|
332
|
+
:param headers: The headers to be sent with the request.
|
|
333
|
+
:type headers: Optional[Dict[str, str]]
|
|
334
|
+
:return: The response
|
|
335
|
+
:rtype: HttpResponse
|
|
336
|
+
"""
|
|
337
|
+
if headers is None:
|
|
338
|
+
headers = {}
|
|
339
|
+
headers["User-Agent"] = f"promptflow/{VERSION}"
|
|
340
|
+
headers["Authorization"] = f"Bearer {self._get_token()}"
|
|
341
|
+
|
|
342
|
+
session = get_http_client().with_policies(
|
|
343
|
+
retry_policy=RetryPolicy(
|
|
344
|
+
retry_total=EvalRun._MAX_RETRIES,
|
|
345
|
+
retry_connect=EvalRun._MAX_RETRIES,
|
|
346
|
+
retry_read=EvalRun._MAX_RETRIES,
|
|
347
|
+
retry_status=EvalRun._MAX_RETRIES,
|
|
348
|
+
retry_on_status_codes=(408, 429, 500, 502, 503, 504),
|
|
349
|
+
retry_backoff_factor=EvalRun._BACKOFF_FACTOR,
|
|
350
|
+
)
|
|
351
|
+
)
|
|
352
|
+
return session.request(method, url, headers=headers, json=json_dict, timeout=EvalRun._TIMEOUT)
|
|
353
|
+
|
|
354
|
+
def _log_warning(self, failed_op: str, response: HttpResponse) -> None:
|
|
355
|
+
"""
|
|
356
|
+
Log the error if request was not successful.
|
|
357
|
+
|
|
358
|
+
:param failed_op: The user-friendly message for the failed operation.
|
|
359
|
+
:type failed_op: str
|
|
360
|
+
:param response: The request.
|
|
361
|
+
:type response: HttpResponse
|
|
362
|
+
"""
|
|
363
|
+
LOGGER.warning(
|
|
364
|
+
"Unable to %s, the request failed with status code %s, response.text()=%s.",
|
|
365
|
+
failed_op,
|
|
366
|
+
response.status_code,
|
|
367
|
+
response.text(),
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
def _check_state_and_log(self, action: str, bad_states: Set[RunStatus], should_raise: bool) -> bool:
|
|
371
|
+
"""
|
|
372
|
+
Check that the run is in the correct state and log worning if it is not.
|
|
373
|
+
|
|
374
|
+
:param action: Action, which caused this check. For example if it is "log artifact",
|
|
375
|
+
the log message will start "Unable to log artifact."
|
|
376
|
+
:type action: str
|
|
377
|
+
:param bad_states: The states, considered invalid for given action.
|
|
378
|
+
:type bad_states: Set[RunStatus]
|
|
379
|
+
:param should_raise: Should we raise an error if the bad state has been encountered
|
|
380
|
+
:type should_raise: bool
|
|
381
|
+
:raises: ~azure.ai.evaluations._exceptions.EvaluationException if should_raise is True
|
|
382
|
+
and invalid state was encountered.
|
|
383
|
+
:return: Whether or not run is in the correct state.
|
|
384
|
+
:rtype: bool
|
|
385
|
+
"""
|
|
386
|
+
if self._status in bad_states:
|
|
387
|
+
msg = f"Unable to {action} due to Run status={self._status}."
|
|
388
|
+
if should_raise:
|
|
389
|
+
raise EvaluationException(
|
|
390
|
+
message=msg,
|
|
391
|
+
internal_message=msg,
|
|
392
|
+
target=ErrorTarget.EVAL_RUN,
|
|
393
|
+
category=ErrorCategory.FAILED_EXECUTION,
|
|
394
|
+
blame=ErrorBlame.UNKNOWN,
|
|
395
|
+
)
|
|
396
|
+
LOGGER.warning(msg)
|
|
397
|
+
return False
|
|
398
|
+
return True
|
|
399
|
+
|
|
400
|
+
def log_artifact(self, artifact_folder: str, artifact_name: str = EVALUATION_ARTIFACT) -> None:
|
|
401
|
+
"""
|
|
402
|
+
The local implementation of mlflow-like artifact logging.
|
|
403
|
+
|
|
404
|
+
**Note:** In the current implementation we are not using the thread pool executor
|
|
405
|
+
as it is done in azureml-mlflow, instead we are just running upload in cycle as we are not
|
|
406
|
+
expecting uploading a lot of artifacts.
|
|
407
|
+
|
|
408
|
+
:param artifact_folder: The folder with artifacts to be uploaded.
|
|
409
|
+
:type artifact_folder: str
|
|
410
|
+
:param artifact_name: The name of the artifact to be uploaded. Defaults to
|
|
411
|
+
azure.ai.evaluation._evaluate._eval_run.EvalRun.EVALUATION_ARTIFACT.
|
|
412
|
+
:type artifact_name: str
|
|
413
|
+
"""
|
|
414
|
+
if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
|
|
415
|
+
return
|
|
416
|
+
# Check if artifact directory is empty or does not exist.
|
|
417
|
+
if not os.path.isdir(artifact_folder):
|
|
418
|
+
LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
|
|
419
|
+
return
|
|
420
|
+
if not os.listdir(artifact_folder):
|
|
421
|
+
LOGGER.warning("The path to the artifact is empty.")
|
|
422
|
+
return
|
|
423
|
+
if not os.path.isfile(os.path.join(artifact_folder, artifact_name)):
|
|
424
|
+
LOGGER.warning("The run results file was not found, skipping artifacts upload.")
|
|
425
|
+
return
|
|
426
|
+
# First we will list the files and the appropriate remote paths for them.
|
|
427
|
+
root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
|
|
428
|
+
remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
|
|
429
|
+
local_paths = []
|
|
430
|
+
# Go over the artifact folder and upload all artifacts.
|
|
431
|
+
for root, _, filenames in os.walk(artifact_folder):
|
|
432
|
+
upload_path = root_upload_path
|
|
433
|
+
if root != artifact_folder:
|
|
434
|
+
rel_path = os.path.relpath(root, artifact_folder)
|
|
435
|
+
if rel_path != ".":
|
|
436
|
+
upload_path = posixpath.join(root_upload_path, rel_path)
|
|
437
|
+
for f in filenames:
|
|
438
|
+
remote_file_path = posixpath.join(upload_path, f)
|
|
439
|
+
remote_paths["paths"].append({"path": remote_file_path})
|
|
440
|
+
local_file_path = os.path.join(root, f)
|
|
441
|
+
local_paths.append(local_file_path)
|
|
442
|
+
|
|
443
|
+
# We will write the artifacts to the workspaceblobstore
|
|
444
|
+
datastore = self._ml_client.datastores.get_default(include_secrets=True)
|
|
445
|
+
account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
|
|
446
|
+
svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
|
|
447
|
+
try:
|
|
448
|
+
for local, remote in zip(local_paths, remote_paths["paths"]):
|
|
449
|
+
blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
|
|
450
|
+
with open(local, "rb") as fp:
|
|
451
|
+
blob_client.upload_blob(fp, overwrite=True)
|
|
452
|
+
except HttpResponseError as ex:
|
|
453
|
+
if ex.status_code == 403:
|
|
454
|
+
msg = (
|
|
455
|
+
"Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
|
|
456
|
+
" Please ensure that the necessary access rights are granted."
|
|
457
|
+
)
|
|
458
|
+
raise EvaluationException(
|
|
459
|
+
message=msg,
|
|
460
|
+
target=ErrorTarget.EVAL_RUN,
|
|
461
|
+
category=ErrorCategory.FAILED_REMOTE_TRACKING,
|
|
462
|
+
blame=ErrorBlame.USER_ERROR,
|
|
463
|
+
tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
|
|
464
|
+
) from ex
|
|
465
|
+
|
|
466
|
+
raise ex
|
|
467
|
+
|
|
468
|
+
# To show artifact in UI we will need to register it. If it is a promptflow run,
|
|
469
|
+
# we are rewriting already registered artifact and need to skip this step.
|
|
470
|
+
if self._is_promptflow_run:
|
|
471
|
+
return
|
|
472
|
+
|
|
473
|
+
url = (
|
|
474
|
+
f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
|
|
475
|
+
f"/resourceGroups/{self._resource_group_name}/providers/"
|
|
476
|
+
f"Microsoft.MachineLearningServices/workspaces/{self._workspace_name}/artifacts/register"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
response = self.request_with_retry(
|
|
480
|
+
url=url,
|
|
481
|
+
method="POST",
|
|
482
|
+
json_dict={
|
|
483
|
+
"origin": "ExperimentRun",
|
|
484
|
+
"container": f"dcid.{self.info.run_id}",
|
|
485
|
+
"path": artifact_name,
|
|
486
|
+
"dataPath": {
|
|
487
|
+
"dataStoreName": datastore.name,
|
|
488
|
+
"relativePath": posixpath.join(root_upload_path, artifact_name),
|
|
489
|
+
},
|
|
490
|
+
},
|
|
491
|
+
)
|
|
492
|
+
if response.status_code != 200:
|
|
493
|
+
self._log_warning("register artifact", response)
|
|
494
|
+
|
|
495
|
+
# register artifacts for images if exists in image folder
|
|
496
|
+
try:
|
|
497
|
+
for remote_path in remote_paths["paths"]:
|
|
498
|
+
remote_file_path = remote_path["path"]
|
|
499
|
+
if "images" in os.path.normpath(remote_file_path).split(os.sep):
|
|
500
|
+
response = self.request_with_retry(
|
|
501
|
+
url=url,
|
|
502
|
+
method="POST",
|
|
503
|
+
json_dict={
|
|
504
|
+
"origin": "ExperimentRun",
|
|
505
|
+
"container": f"dcid.{self.info.run_id}",
|
|
506
|
+
"path": posixpath.join("images", os.path.basename(remote_file_path)),
|
|
507
|
+
"dataPath": {
|
|
508
|
+
"dataStoreName": datastore.name,
|
|
509
|
+
"relativePath": remote_file_path,
|
|
510
|
+
},
|
|
511
|
+
},
|
|
512
|
+
)
|
|
513
|
+
if response.status_code != 200:
|
|
514
|
+
self._log_warning("register image artifact", response)
|
|
515
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
516
|
+
LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
|
|
517
|
+
|
|
518
|
+
def _get_datastore_credential(self, datastore: "Datastore"):
|
|
519
|
+
# Reference the logic in azure.ai.ml._artifact._artifact_utilities
|
|
520
|
+
# https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103
|
|
521
|
+
credential = datastore.credentials
|
|
522
|
+
if isinstance(credential, AccountKeyConfiguration):
|
|
523
|
+
return credential.account_key
|
|
524
|
+
if hasattr(credential, "sas_token"):
|
|
525
|
+
return credential.sas_token
|
|
526
|
+
return self._ml_client.datastores._credential # pylint: disable=protected-access
|
|
527
|
+
|
|
528
|
+
def log_metric(self, key: str, value: float) -> None:
|
|
529
|
+
"""
|
|
530
|
+
Log the metric to azure similar to how it is done by mlflow.
|
|
531
|
+
|
|
532
|
+
:param key: The metric name to be logged.
|
|
533
|
+
:type key: str
|
|
534
|
+
:param value: The valure to be logged.
|
|
535
|
+
:type value: float
|
|
536
|
+
"""
|
|
537
|
+
if not self._check_state_and_log("log metric", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
|
|
538
|
+
return
|
|
539
|
+
body = {
|
|
540
|
+
"run_uuid": self.info.run_id,
|
|
541
|
+
"key": key,
|
|
542
|
+
"value": value,
|
|
543
|
+
"timestamp": int(time.time() * 1000),
|
|
544
|
+
"step": 0,
|
|
545
|
+
"run_id": self.info.run_id,
|
|
546
|
+
}
|
|
547
|
+
response = self.request_with_retry(
|
|
548
|
+
url=self.get_metrics_url(),
|
|
549
|
+
method="POST",
|
|
550
|
+
json_dict=body,
|
|
551
|
+
)
|
|
552
|
+
if response.status_code != 200:
|
|
553
|
+
self._log_warning("save metrics", response)
|
|
554
|
+
|
|
555
|
+
def write_properties_to_run_history(self, properties: Dict[str, Any]) -> None:
|
|
556
|
+
"""
|
|
557
|
+
Write properties to the RunHistory service.
|
|
558
|
+
|
|
559
|
+
:param properties: The properties to be written to run history.
|
|
560
|
+
:type properties: dict
|
|
561
|
+
"""
|
|
562
|
+
if not self._check_state_and_log("write properties", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
|
|
563
|
+
return
|
|
564
|
+
# update host to run history and request PATCH API
|
|
565
|
+
response = self.request_with_retry(
|
|
566
|
+
url=self.get_run_history_uri(),
|
|
567
|
+
method="PATCH",
|
|
568
|
+
json_dict={"runId": self.info.run_id, "properties": properties},
|
|
569
|
+
)
|
|
570
|
+
if response.status_code != 200:
|
|
571
|
+
LOGGER.error("Fail writing properties '%s' to run history: %s", properties, response.text())
|