azure-ai-evaluation 1.0.0b3__py3-none-any.whl → 1.0.0b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +23 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +20 -9
- azure/ai/evaluation/_common/constants.py +9 -2
- azure/ai/evaluation/_common/math.py +29 -0
- azure/ai/evaluation/_common/rai_service.py +222 -93
- azure/ai/evaluation/_common/utils.py +328 -19
- azure/ai/evaluation/_constants.py +16 -8
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +33 -17
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +14 -7
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +22 -4
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +35 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +47 -14
- azure/ai/evaluation/_evaluate/_evaluate.py +370 -188
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +15 -16
- azure/ai/evaluation/_evaluate/_utils.py +77 -25
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +16 -10
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +76 -46
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +26 -19
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +62 -25
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +68 -36
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +67 -46
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +33 -4
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +33 -4
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +33 -4
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +33 -4
- azure/ai/evaluation/_evaluators/_eci/_eci.py +7 -5
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +22 -21
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +51 -16
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -7
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +130 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +57 -0
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +96 -0
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +120 -0
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +96 -0
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +96 -0
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +96 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +46 -13
- azure/ai/evaluation/_evaluators/_qa/_qa.py +11 -6
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +23 -20
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +126 -80
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +2 -2
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +150 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +32 -15
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +36 -10
- azure/ai/evaluation/_exceptions.py +26 -6
- azure/ai/evaluation/_http_utils.py +203 -132
- azure/ai/evaluation/_model_configurations.py +23 -6
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +5 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +88 -60
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
- azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +98 -95
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
- azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -9
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +222 -169
- azure/ai/evaluation/simulator/_tracing.py +4 -4
- azure/ai/evaluation/simulator/_utils.py +6 -6
- {azure_ai_evaluation-1.0.0b3.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/METADATA +237 -52
- azure_ai_evaluation-1.0.0b5.dist-info/NOTICE.txt +70 -0
- azure_ai_evaluation-1.0.0b5.dist-info/RECORD +120 -0
- {azure_ai_evaluation-1.0.0b3.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
- azure_ai_evaluation-1.0.0b3.dist-info/RECORD +0 -98
- {azure_ai_evaluation-1.0.0b3.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
import types
|
|
6
|
+
from typing import Optional, Type
|
|
7
|
+
|
|
8
|
+
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TargetRunContext:
|
|
12
|
+
"""Context manager for target batch run.
|
|
13
|
+
|
|
14
|
+
:param upload_snapshot: Whether to upload target snapshot.
|
|
15
|
+
:type upload_snapshot: bool
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, upload_snapshot: bool) -> None:
|
|
19
|
+
self._upload_snapshot = upload_snapshot
|
|
20
|
+
|
|
21
|
+
def __enter__(self) -> None:
|
|
22
|
+
# Address "[WinError 32] The process cannot access the file" error,
|
|
23
|
+
# caused by conflicts when the venv and target function are in the same directory.
|
|
24
|
+
# Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
|
|
25
|
+
if not self._upload_snapshot:
|
|
26
|
+
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
|
|
27
|
+
|
|
28
|
+
def __exit__(
|
|
29
|
+
self,
|
|
30
|
+
exc_type: Optional[Type[BaseException]],
|
|
31
|
+
exc_value: Optional[BaseException],
|
|
32
|
+
exc_tb: Optional[types.TracebackType],
|
|
33
|
+
) -> None:
|
|
34
|
+
if not self._upload_snapshot:
|
|
35
|
+
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
|
|
@@ -10,16 +10,18 @@ import posixpath
|
|
|
10
10
|
import time
|
|
11
11
|
import types
|
|
12
12
|
import uuid
|
|
13
|
-
from typing import Any, Dict, Optional, Set, Type
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set, Type
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
15
|
|
|
16
16
|
from promptflow._sdk.entities import Run
|
|
17
|
+
from typing_extensions import Self
|
|
17
18
|
|
|
18
19
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
19
20
|
from azure.ai.evaluation._http_utils import get_http_client
|
|
20
21
|
from azure.ai.evaluation._version import VERSION
|
|
21
22
|
from azure.core.pipeline.policies import RetryPolicy
|
|
22
23
|
from azure.core.rest import HttpResponse
|
|
24
|
+
from azure.core.exceptions import HttpResponseError
|
|
23
25
|
|
|
24
26
|
LOGGER = logging.getLogger(__name__)
|
|
25
27
|
|
|
@@ -27,6 +29,7 @@ LOGGER = logging.getLogger(__name__)
|
|
|
27
29
|
# Handle optional import. The azure libraries are only present if
|
|
28
30
|
# promptflow-azure is installed.
|
|
29
31
|
try:
|
|
32
|
+
from azure.ai.ml import MLClient
|
|
30
33
|
from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
|
|
31
34
|
from azure.ai.ml.entities._datastore.datastore import Datastore
|
|
32
35
|
from azure.storage.blob import BlobServiceClient
|
|
@@ -121,8 +124,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
121
124
|
self._run_name = run_name
|
|
122
125
|
self._promptflow_run = promptflow_run
|
|
123
126
|
self._status = RunStatus.NOT_STARTED
|
|
124
|
-
self._url_base = None
|
|
125
|
-
self.
|
|
127
|
+
self._url_base: Optional[str] = None
|
|
128
|
+
self._info: Optional[RunInfo] = None
|
|
126
129
|
|
|
127
130
|
@property
|
|
128
131
|
def status(self) -> RunStatus:
|
|
@@ -134,6 +137,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
134
137
|
"""
|
|
135
138
|
return self._status
|
|
136
139
|
|
|
140
|
+
@property
|
|
141
|
+
def info(self) -> RunInfo:
|
|
142
|
+
if self._info is None:
|
|
143
|
+
msg = "Run info is missing"
|
|
144
|
+
raise EvaluationException(
|
|
145
|
+
message=msg,
|
|
146
|
+
internal_message=msg,
|
|
147
|
+
target=ErrorTarget.EVAL_RUN,
|
|
148
|
+
category=ErrorCategory.UNKNOWN,
|
|
149
|
+
blame=ErrorBlame.UNKNOWN,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return self._info
|
|
153
|
+
|
|
137
154
|
def _get_scope(self) -> str:
|
|
138
155
|
"""
|
|
139
156
|
Return the scope information for the workspace.
|
|
@@ -161,11 +178,11 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
161
178
|
)
|
|
162
179
|
self._url_base = None
|
|
163
180
|
self._status = RunStatus.BROKEN
|
|
164
|
-
self.
|
|
181
|
+
self._info = RunInfo.generate(self._run_name)
|
|
165
182
|
else:
|
|
166
183
|
self._url_base = urlparse(self._tracking_uri).netloc
|
|
167
184
|
if self._promptflow_run is not None:
|
|
168
|
-
self.
|
|
185
|
+
self._info = RunInfo(
|
|
169
186
|
self._promptflow_run.name,
|
|
170
187
|
self._promptflow_run._experiment_name, # pylint: disable=protected-access
|
|
171
188
|
self._promptflow_run.name,
|
|
@@ -182,7 +199,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
182
199
|
body["run_name"] = self._run_name
|
|
183
200
|
response = self.request_with_retry(url=url, method="POST", json_dict=body)
|
|
184
201
|
if response.status_code != 200:
|
|
185
|
-
self.
|
|
202
|
+
self._info = RunInfo.generate(self._run_name)
|
|
186
203
|
LOGGER.warning(
|
|
187
204
|
"The run failed to start: %s: %s."
|
|
188
205
|
"The results will be saved locally, but will not be logged to Azure.",
|
|
@@ -192,7 +209,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
192
209
|
self._status = RunStatus.BROKEN
|
|
193
210
|
else:
|
|
194
211
|
parsed_response = response.json()
|
|
195
|
-
self.
|
|
212
|
+
self._info = RunInfo(
|
|
196
213
|
run_id=parsed_response["run"]["info"]["run_id"],
|
|
197
214
|
experiment_id=parsed_response["run"]["info"]["experiment_id"],
|
|
198
215
|
run_name=parsed_response["run"]["info"]["run_name"],
|
|
@@ -235,7 +252,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
235
252
|
LOGGER.warning("Unable to terminate the run.")
|
|
236
253
|
self._status = RunStatus.TERMINATED
|
|
237
254
|
|
|
238
|
-
def __enter__(self):
|
|
255
|
+
def __enter__(self) -> Self:
|
|
239
256
|
"""The Context Manager enter call.
|
|
240
257
|
|
|
241
258
|
:return: The instance of the class.
|
|
@@ -249,7 +266,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
249
266
|
exc_type: Optional[Type[BaseException]],
|
|
250
267
|
exc_value: Optional[BaseException],
|
|
251
268
|
exc_tb: Optional[types.TracebackType],
|
|
252
|
-
) ->
|
|
269
|
+
) -> None:
|
|
253
270
|
"""The context manager exit call.
|
|
254
271
|
|
|
255
272
|
:param exc_type: The exception type
|
|
@@ -408,7 +425,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
408
425
|
return
|
|
409
426
|
# First we will list the files and the appropriate remote paths for them.
|
|
410
427
|
root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
|
|
411
|
-
remote_paths = {"paths": []}
|
|
428
|
+
remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
|
|
412
429
|
local_paths = []
|
|
413
430
|
# Go over the artifact folder and upload all artifacts.
|
|
414
431
|
for root, _, filenames in os.walk(artifact_folder):
|
|
@@ -427,10 +444,26 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
427
444
|
datastore = self._ml_client.datastores.get_default(include_secrets=True)
|
|
428
445
|
account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
|
|
429
446
|
svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
447
|
+
try:
|
|
448
|
+
for local, remote in zip(local_paths, remote_paths["paths"]):
|
|
449
|
+
blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
|
|
450
|
+
with open(local, "rb") as fp:
|
|
451
|
+
blob_client.upload_blob(fp, overwrite=True)
|
|
452
|
+
except HttpResponseError as ex:
|
|
453
|
+
if ex.status_code == 403:
|
|
454
|
+
msg = (
|
|
455
|
+
"Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
|
|
456
|
+
" Please ensure that the necessary access rights are granted."
|
|
457
|
+
)
|
|
458
|
+
raise EvaluationException(
|
|
459
|
+
message=msg,
|
|
460
|
+
target=ErrorTarget.EVAL_RUN,
|
|
461
|
+
category=ErrorCategory.FAILED_REMOTE_TRACKING,
|
|
462
|
+
blame=ErrorBlame.USER_ERROR,
|
|
463
|
+
tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
|
|
464
|
+
) from ex
|
|
465
|
+
|
|
466
|
+
raise ex
|
|
434
467
|
|
|
435
468
|
# To show artifact in UI we will need to register it. If it is a promptflow run,
|
|
436
469
|
# we are rewriting already registered artifact and need to skip this step.
|