azure-ai-evaluation 1.0.0__py3-none-any.whl → 1.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +5 -31
- azure/ai/evaluation/_common/constants.py +2 -9
- azure/ai/evaluation/_common/rai_service.py +120 -300
- azure/ai/evaluation/_common/utils.py +23 -381
- azure/ai/evaluation/_constants.py +6 -19
- azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/__init__.py +2 -3
- azure/ai/evaluation/_evaluate/{_batch_run/eval_run_context.py → _batch_run_client/batch_run_context.py} +7 -23
- azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/code_client.py +17 -33
- azure/ai/evaluation/_evaluate/{_batch_run → _batch_run_client}/proxy_client.py +4 -32
- azure/ai/evaluation/_evaluate/_eval_run.py +24 -81
- azure/ai/evaluation/_evaluate/_evaluate.py +239 -393
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +17 -17
- azure/ai/evaluation/_evaluate/_utils.py +28 -82
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +18 -17
- azure/ai/evaluation/_evaluators/{_retrieval → _chat}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_chat/_chat.py +357 -0
- azure/ai/evaluation/_evaluators/{_service_groundedness → _chat/retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +157 -0
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +48 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +88 -78
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +39 -76
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +4 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +67 -105
- azure/ai/evaluation/_evaluators/{_multimodal/_content_safety_multimodal_base.py → _content_safety/_content_safety_base.py} +34 -24
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +301 -0
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +54 -105
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +52 -99
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +52 -101
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +51 -101
- azure/ai/evaluation/_evaluators/_eci/_eci.py +54 -44
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +19 -34
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +89 -76
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +41 -66
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +16 -14
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +87 -113
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +54 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +27 -20
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +80 -89
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +104 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +30 -23
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +96 -84
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +47 -78
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +27 -26
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +38 -53
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +5 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +105 -91
- azure/ai/evaluation/_exceptions.py +7 -28
- azure/ai/evaluation/_http_utils.py +132 -203
- azure/ai/evaluation/_model_configurations.py +8 -104
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +1 -2
- azure/ai/evaluation/simulator/_adversarial_scenario.py +1 -20
- azure/ai/evaluation/simulator/_adversarial_simulator.py +92 -111
- azure/ai/evaluation/simulator/_constants.py +1 -11
- azure/ai/evaluation/simulator/_conversation/__init__.py +12 -13
- azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +67 -33
- azure/ai/evaluation/simulator/_helpers/__init__.py +2 -1
- azure/ai/evaluation/{_common → simulator/_helpers}/_experimental.py +9 -24
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +5 -26
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +94 -107
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +22 -70
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +11 -28
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +4 -8
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +24 -68
- azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +10 -6
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +5 -6
- azure/ai/evaluation/simulator/_simulator.py +207 -277
- azure/ai/evaluation/simulator/_tracing.py +4 -4
- azure/ai/evaluation/simulator/_utils.py +13 -31
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +449 -0
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +99 -0
- {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_common/math.py +0 -89
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -46
- azure/ai/evaluation/_evaluators/_common/__init__.py +0 -13
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -344
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -88
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -133
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -113
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -99
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -112
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -93
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -148
- azure/ai/evaluation/_vendor/__init__.py +0 -3
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -14
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -328
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -63
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -63
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -53
- azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
- azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -1150
- azure_ai_evaluation-1.0.0.dist-info/METADATA +0 -595
- azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +0 -70
- azure_ai_evaluation-1.0.0.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.0.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/top_level.txt +0 -0
|
@@ -5,9 +5,8 @@ import inspect
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
import os
|
|
8
|
-
from concurrent.futures import Future
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
9
|
+
from typing import Callable, Dict, Optional, Union
|
|
11
10
|
|
|
12
11
|
import pandas as pd
|
|
13
12
|
from promptflow.contracts.types import AttrDict
|
|
@@ -23,31 +22,25 @@ LOGGER = logging.getLogger(__name__)
|
|
|
23
22
|
|
|
24
23
|
class CodeRun:
|
|
25
24
|
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
|
|
28
|
-
run: Future,
|
|
29
|
-
input_data,
|
|
30
|
-
evaluator_name: Optional[str] = None,
|
|
31
|
-
aggregator: Callable[["CodeRun"], Future],
|
|
32
|
-
**kwargs, # pylint: disable=unused-argument
|
|
33
|
-
) -> None:
|
|
25
|
+
self, run, input_data, evaluator_name=None, aggregated_metrics=None, **kwargs # pylint: disable=unused-argument
|
|
26
|
+
):
|
|
34
27
|
self.run = run
|
|
35
28
|
self.evaluator_name = evaluator_name if evaluator_name is not None else ""
|
|
36
29
|
self.input_data = input_data
|
|
37
|
-
self.aggregated_metrics =
|
|
30
|
+
self.aggregated_metrics = aggregated_metrics
|
|
38
31
|
|
|
39
|
-
def get_result_df(self, exclude_inputs
|
|
32
|
+
def get_result_df(self, exclude_inputs=False):
|
|
40
33
|
batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
|
|
41
|
-
result_df =
|
|
34
|
+
result_df = self.run.result(timeout=batch_run_timeout)
|
|
42
35
|
if exclude_inputs:
|
|
43
36
|
result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")])
|
|
44
37
|
return result_df
|
|
45
38
|
|
|
46
|
-
def get_aggregated_metrics(self)
|
|
39
|
+
def get_aggregated_metrics(self):
|
|
47
40
|
try:
|
|
48
41
|
batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT)
|
|
49
|
-
aggregated_metrics
|
|
50
|
-
|
|
42
|
+
aggregated_metrics = (
|
|
43
|
+
self.aggregated_metrics.result(timeout=batch_run_timeout)
|
|
51
44
|
if self.aggregated_metrics is not None
|
|
52
45
|
else None
|
|
53
46
|
)
|
|
@@ -111,10 +104,10 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
111
104
|
verify_integrity=True,
|
|
112
105
|
)
|
|
113
106
|
|
|
114
|
-
|
|
115
|
-
def _calculate_aggregations(evaluator: Callable, run: CodeRun) -> Any:
|
|
107
|
+
def _calculate_aggregations(self, evaluator, run):
|
|
116
108
|
try:
|
|
117
109
|
if _has_aggregator(evaluator):
|
|
110
|
+
aggregate_input = None
|
|
118
111
|
evaluator_output = run.get_result_df(exclude_inputs=True)
|
|
119
112
|
if len(evaluator_output.columns) == 1 and evaluator_output.columns[0] == "output":
|
|
120
113
|
aggregate_input = evaluator_output["output"].tolist()
|
|
@@ -159,30 +152,21 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
159
152
|
column_mapping=column_mapping,
|
|
160
153
|
evaluator_name=evaluator_name,
|
|
161
154
|
)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
evaluator_name=evaluator_name,
|
|
167
|
-
aggregator=lambda code_run: self._thread_pool.submit(
|
|
168
|
-
self._calculate_aggregations, evaluator=flow, run=code_run
|
|
169
|
-
),
|
|
170
|
-
)
|
|
155
|
+
run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
|
|
156
|
+
aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
|
|
157
|
+
run.aggregated_metrics = aggregation_future
|
|
158
|
+
return run
|
|
171
159
|
|
|
172
160
|
def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
|
|
173
161
|
result_df = run.get_result_df(exclude_inputs=not all_results)
|
|
174
162
|
return result_df
|
|
175
163
|
|
|
176
|
-
def get_metrics(self, run: CodeRun) ->
|
|
164
|
+
def get_metrics(self, run: CodeRun) -> Optional[None]:
|
|
177
165
|
try:
|
|
178
166
|
aggregated_metrics = run.get_aggregated_metrics()
|
|
179
167
|
print("Aggregated metrics")
|
|
180
168
|
print(aggregated_metrics)
|
|
181
169
|
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
182
170
|
LOGGER.debug("Error calculating metrics for evaluator %s, failed with error %s", run.evaluator_name, ex)
|
|
183
|
-
return
|
|
171
|
+
return None
|
|
184
172
|
return aggregated_metrics
|
|
185
|
-
|
|
186
|
-
def get_run_summary(self, run: CodeRun) -> Any: # pylint: disable=unused-argument
|
|
187
|
-
# Not implemented
|
|
188
|
-
return None
|
|
@@ -1,17 +1,13 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
|
|
5
|
-
# pylint: disable=protected-access
|
|
6
|
-
|
|
7
4
|
import inspect
|
|
8
5
|
import logging
|
|
9
|
-
import math
|
|
10
6
|
import os
|
|
11
|
-
from collections import OrderedDict
|
|
12
7
|
from concurrent.futures import Future
|
|
13
8
|
from typing import Any, Callable, Dict, Optional, Union
|
|
14
9
|
|
|
10
|
+
import numpy as np
|
|
15
11
|
import pandas as pd
|
|
16
12
|
from promptflow.client import PFClient
|
|
17
13
|
from promptflow.entities import Run
|
|
@@ -40,7 +36,7 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
40
36
|
**kwargs
|
|
41
37
|
) -> ProxyRun:
|
|
42
38
|
flow_to_run = flow
|
|
43
|
-
if
|
|
39
|
+
if hasattr(flow, "_to_async"):
|
|
44
40
|
flow_to_run = flow._to_async() # pylint: disable=protected-access
|
|
45
41
|
|
|
46
42
|
batch_use_async = self._should_batch_use_async(flow_to_run)
|
|
@@ -57,40 +53,16 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
57
53
|
def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
|
|
58
54
|
run: Run = proxy_run.run.result()
|
|
59
55
|
result_df = self._pf_client.get_details(run, all_results=all_results)
|
|
60
|
-
result_df.replace("(Failed)",
|
|
56
|
+
result_df.replace("(Failed)", np.nan, inplace=True)
|
|
61
57
|
return result_df
|
|
62
58
|
|
|
63
59
|
def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
|
|
64
60
|
run: Run = proxy_run.run.result()
|
|
65
61
|
return self._pf_client.get_metrics(run)
|
|
66
62
|
|
|
67
|
-
def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
|
|
68
|
-
run = proxy_run.run.result()
|
|
69
|
-
|
|
70
|
-
# pylint: disable=protected-access
|
|
71
|
-
completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
|
|
72
|
-
failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")
|
|
73
|
-
|
|
74
|
-
# Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines
|
|
75
|
-
if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0:
|
|
76
|
-
status = "Completed with Errors"
|
|
77
|
-
else:
|
|
78
|
-
status = run.status
|
|
79
|
-
|
|
80
|
-
# Return the ordered dictionary with the updated status
|
|
81
|
-
return OrderedDict(
|
|
82
|
-
[
|
|
83
|
-
("status", status),
|
|
84
|
-
("duration", str(run._end_time - run._created_on)),
|
|
85
|
-
("completed_lines", completed_lines),
|
|
86
|
-
("failed_lines", failed_lines),
|
|
87
|
-
("log_path", str(run._output_path)),
|
|
88
|
-
]
|
|
89
|
-
)
|
|
90
|
-
|
|
91
63
|
@staticmethod
|
|
92
64
|
def _should_batch_use_async(flow):
|
|
93
|
-
if os.getenv("
|
|
65
|
+
if os.getenv("PF_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
|
|
94
66
|
if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
|
|
95
67
|
return True
|
|
96
68
|
if inspect.iscoroutinefunction(flow):
|
|
@@ -10,18 +10,16 @@ import posixpath
|
|
|
10
10
|
import time
|
|
11
11
|
import types
|
|
12
12
|
import uuid
|
|
13
|
-
from typing import Any, Dict,
|
|
13
|
+
from typing import Any, Dict, Optional, Set, Type
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
15
|
|
|
16
16
|
from promptflow._sdk.entities import Run
|
|
17
|
-
from typing_extensions import Self
|
|
18
17
|
|
|
19
18
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
20
19
|
from azure.ai.evaluation._http_utils import get_http_client
|
|
21
20
|
from azure.ai.evaluation._version import VERSION
|
|
22
21
|
from azure.core.pipeline.policies import RetryPolicy
|
|
23
22
|
from azure.core.rest import HttpResponse
|
|
24
|
-
from azure.core.exceptions import HttpResponseError
|
|
25
23
|
|
|
26
24
|
LOGGER = logging.getLogger(__name__)
|
|
27
25
|
|
|
@@ -29,20 +27,18 @@ LOGGER = logging.getLogger(__name__)
|
|
|
29
27
|
# Handle optional import. The azure libraries are only present if
|
|
30
28
|
# promptflow-azure is installed.
|
|
31
29
|
try:
|
|
32
|
-
from azure.ai.ml import MLClient
|
|
33
30
|
from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
|
|
34
31
|
from azure.ai.ml.entities._datastore.datastore import Datastore
|
|
35
32
|
from azure.storage.blob import BlobServiceClient
|
|
36
33
|
except (ModuleNotFoundError, ImportError):
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
)
|
|
34
|
+
# If the above mentioned modules cannot be imported, we are running
|
|
35
|
+
# in local mode and MLClient in the constructor will be None, so
|
|
36
|
+
# we will not arrive to Azure-dependent code.
|
|
37
|
+
|
|
38
|
+
# We are logging the import failure only if debug logging level is set because:
|
|
39
|
+
# - If the project configuration was not provided this import is not needed.
|
|
40
|
+
# - If the project configuration was provided, the error will be raised by PFClient.
|
|
41
|
+
LOGGER.debug("promptflow.azure is not installed.")
|
|
46
42
|
|
|
47
43
|
|
|
48
44
|
@dataclasses.dataclass
|
|
@@ -104,6 +100,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
104
100
|
_SCOPE = "https://management.azure.com/.default"
|
|
105
101
|
|
|
106
102
|
EVALUATION_ARTIFACT = "instance_results.jsonl"
|
|
103
|
+
EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
|
|
107
104
|
|
|
108
105
|
def __init__(
|
|
109
106
|
self,
|
|
@@ -124,8 +121,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
124
121
|
self._run_name = run_name
|
|
125
122
|
self._promptflow_run = promptflow_run
|
|
126
123
|
self._status = RunStatus.NOT_STARTED
|
|
127
|
-
self._url_base
|
|
128
|
-
self.
|
|
124
|
+
self._url_base = None
|
|
125
|
+
self.info = None
|
|
129
126
|
|
|
130
127
|
@property
|
|
131
128
|
def status(self) -> RunStatus:
|
|
@@ -137,20 +134,6 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
137
134
|
"""
|
|
138
135
|
return self._status
|
|
139
136
|
|
|
140
|
-
@property
|
|
141
|
-
def info(self) -> RunInfo:
|
|
142
|
-
if self._info is None:
|
|
143
|
-
msg = "Run info is missing"
|
|
144
|
-
raise EvaluationException(
|
|
145
|
-
message=msg,
|
|
146
|
-
internal_message=msg,
|
|
147
|
-
target=ErrorTarget.EVAL_RUN,
|
|
148
|
-
category=ErrorCategory.UNKNOWN,
|
|
149
|
-
blame=ErrorBlame.UNKNOWN,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
return self._info
|
|
153
|
-
|
|
154
137
|
def _get_scope(self) -> str:
|
|
155
138
|
"""
|
|
156
139
|
Return the scope information for the workspace.
|
|
@@ -178,11 +161,11 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
178
161
|
)
|
|
179
162
|
self._url_base = None
|
|
180
163
|
self._status = RunStatus.BROKEN
|
|
181
|
-
self.
|
|
164
|
+
self.info = RunInfo.generate(self._run_name)
|
|
182
165
|
else:
|
|
183
166
|
self._url_base = urlparse(self._tracking_uri).netloc
|
|
184
167
|
if self._promptflow_run is not None:
|
|
185
|
-
self.
|
|
168
|
+
self.info = RunInfo(
|
|
186
169
|
self._promptflow_run.name,
|
|
187
170
|
self._promptflow_run._experiment_name, # pylint: disable=protected-access
|
|
188
171
|
self._promptflow_run.name,
|
|
@@ -199,7 +182,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
199
182
|
body["run_name"] = self._run_name
|
|
200
183
|
response = self.request_with_retry(url=url, method="POST", json_dict=body)
|
|
201
184
|
if response.status_code != 200:
|
|
202
|
-
self.
|
|
185
|
+
self.info = RunInfo.generate(self._run_name)
|
|
203
186
|
LOGGER.warning(
|
|
204
187
|
"The run failed to start: %s: %s."
|
|
205
188
|
"The results will be saved locally, but will not be logged to Azure.",
|
|
@@ -209,7 +192,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
209
192
|
self._status = RunStatus.BROKEN
|
|
210
193
|
else:
|
|
211
194
|
parsed_response = response.json()
|
|
212
|
-
self.
|
|
195
|
+
self.info = RunInfo(
|
|
213
196
|
run_id=parsed_response["run"]["info"]["run_id"],
|
|
214
197
|
experiment_id=parsed_response["run"]["info"]["experiment_id"],
|
|
215
198
|
run_name=parsed_response["run"]["info"]["run_name"],
|
|
@@ -252,7 +235,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
252
235
|
LOGGER.warning("Unable to terminate the run.")
|
|
253
236
|
self._status = RunStatus.TERMINATED
|
|
254
237
|
|
|
255
|
-
def __enter__(self)
|
|
238
|
+
def __enter__(self):
|
|
256
239
|
"""The Context Manager enter call.
|
|
257
240
|
|
|
258
241
|
:return: The instance of the class.
|
|
@@ -266,7 +249,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
266
249
|
exc_type: Optional[Type[BaseException]],
|
|
267
250
|
exc_value: Optional[BaseException],
|
|
268
251
|
exc_tb: Optional[types.TracebackType],
|
|
269
|
-
) ->
|
|
252
|
+
) -> Optional[bool]:
|
|
270
253
|
"""The context manager exit call.
|
|
271
254
|
|
|
272
255
|
:param exc_type: The exception type
|
|
@@ -413,7 +396,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
413
396
|
"""
|
|
414
397
|
if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
|
|
415
398
|
return
|
|
416
|
-
# Check if artifact
|
|
399
|
+
# Check if artifact dirrectory is empty or does not exist.
|
|
417
400
|
if not os.path.isdir(artifact_folder):
|
|
418
401
|
LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
|
|
419
402
|
return
|
|
@@ -425,7 +408,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
425
408
|
return
|
|
426
409
|
# First we will list the files and the appropriate remote paths for them.
|
|
427
410
|
root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
|
|
428
|
-
remote_paths
|
|
411
|
+
remote_paths = {"paths": []}
|
|
429
412
|
local_paths = []
|
|
430
413
|
# Go over the artifact folder and upload all artifacts.
|
|
431
414
|
for root, _, filenames in os.walk(artifact_folder):
|
|
@@ -444,32 +427,15 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
444
427
|
datastore = self._ml_client.datastores.get_default(include_secrets=True)
|
|
445
428
|
account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
|
|
446
429
|
svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
blob_client.upload_blob(fp, overwrite=True)
|
|
452
|
-
except HttpResponseError as ex:
|
|
453
|
-
if ex.status_code == 403:
|
|
454
|
-
msg = (
|
|
455
|
-
"Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
|
|
456
|
-
" Please ensure that the necessary access rights are granted."
|
|
457
|
-
)
|
|
458
|
-
raise EvaluationException(
|
|
459
|
-
message=msg,
|
|
460
|
-
target=ErrorTarget.EVAL_RUN,
|
|
461
|
-
category=ErrorCategory.FAILED_REMOTE_TRACKING,
|
|
462
|
-
blame=ErrorBlame.USER_ERROR,
|
|
463
|
-
tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
|
|
464
|
-
) from ex
|
|
465
|
-
|
|
466
|
-
raise ex
|
|
430
|
+
for local, remote in zip(local_paths, remote_paths["paths"]):
|
|
431
|
+
blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
|
|
432
|
+
with open(local, "rb") as fp:
|
|
433
|
+
blob_client.upload_blob(fp, overwrite=True)
|
|
467
434
|
|
|
468
435
|
# To show artifact in UI we will need to register it. If it is a promptflow run,
|
|
469
436
|
# we are rewriting already registered artifact and need to skip this step.
|
|
470
437
|
if self._is_promptflow_run:
|
|
471
438
|
return
|
|
472
|
-
|
|
473
439
|
url = (
|
|
474
440
|
f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
|
|
475
441
|
f"/resourceGroups/{self._resource_group_name}/providers/"
|
|
@@ -492,29 +458,6 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
492
458
|
if response.status_code != 200:
|
|
493
459
|
self._log_warning("register artifact", response)
|
|
494
460
|
|
|
495
|
-
# register artifacts for images if exists in image folder
|
|
496
|
-
try:
|
|
497
|
-
for remote_path in remote_paths["paths"]:
|
|
498
|
-
remote_file_path = remote_path["path"]
|
|
499
|
-
if "images" in os.path.normpath(remote_file_path).split(os.sep):
|
|
500
|
-
response = self.request_with_retry(
|
|
501
|
-
url=url,
|
|
502
|
-
method="POST",
|
|
503
|
-
json_dict={
|
|
504
|
-
"origin": "ExperimentRun",
|
|
505
|
-
"container": f"dcid.{self.info.run_id}",
|
|
506
|
-
"path": posixpath.join("images", os.path.basename(remote_file_path)),
|
|
507
|
-
"dataPath": {
|
|
508
|
-
"dataStoreName": datastore.name,
|
|
509
|
-
"relativePath": remote_file_path,
|
|
510
|
-
},
|
|
511
|
-
},
|
|
512
|
-
)
|
|
513
|
-
if response.status_code != 200:
|
|
514
|
-
self._log_warning("register image artifact", response)
|
|
515
|
-
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
516
|
-
LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
|
|
517
|
-
|
|
518
461
|
def _get_datastore_credential(self, datastore: "Datastore"):
|
|
519
462
|
# Reference the logic in azure.ai.ml._artifact._artifact_utilities
|
|
520
463
|
# https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_artifact_utilities.py#L103
|