azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.0b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +22 -0
- azure/ai/evaluation/_common/constants.py +5 -0
- azure/ai/evaluation/_common/math.py +11 -0
- azure/ai/evaluation/_common/rai_service.py +172 -35
- azure/ai/evaluation/_common/utils.py +162 -23
- azure/ai/evaluation/_constants.py +6 -6
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +4 -4
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +6 -3
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +35 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +21 -4
- azure/ai/evaluation/_evaluate/_evaluate.py +267 -139
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -5
- azure/ai/evaluation/_evaluate/_utils.py +40 -7
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +14 -9
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +20 -19
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +18 -8
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +48 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +56 -19
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +5 -5
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +30 -1
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +30 -1
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +30 -1
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +30 -1
- azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -1
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +20 -20
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +49 -15
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -7
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +130 -0
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +57 -0
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +96 -0
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +120 -0
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +96 -0
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +96 -0
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +96 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +44 -11
- azure/ai/evaluation/_evaluators/_qa/_qa.py +7 -3
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -19
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +125 -82
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +2 -2
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +150 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +17 -14
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +32 -5
- azure/ai/evaluation/_exceptions.py +17 -0
- azure/ai/evaluation/_model_configurations.py +18 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +5 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +4 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +22 -1
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +79 -34
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -1
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -4
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -1
- azure/ai/evaluation/simulator/_simulator.py +115 -61
- azure/ai/evaluation/simulator/_utils.py +6 -6
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/METADATA +166 -9
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/NOTICE.txt +20 -0
- azure_ai_evaluation-1.0.0b5.dist-info/RECORD +120 -0
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
- azure_ai_evaluation-1.0.0b4.dist-info/RECORD +0 -106
- /azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +0 -0
- /azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +0 -0
- {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
+
import re
|
|
5
6
|
import math
|
|
6
7
|
import threading
|
|
7
|
-
from typing import Any, List, Literal, Mapping, Type, TypeVar, Union, cast, get_args, get_origin
|
|
8
|
+
from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
|
|
8
9
|
|
|
9
10
|
import nltk
|
|
10
11
|
from typing_extensions import NotRequired, Required, TypeGuard
|
|
11
|
-
|
|
12
|
+
from promptflow.core._errors import MissingRequiredPackage
|
|
12
13
|
from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
|
|
13
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
14
15
|
from azure.ai.evaluation._model_configurations import (
|
|
@@ -24,7 +25,7 @@ _nltk_data_download_lock = threading.Lock()
|
|
|
24
25
|
T_TypedDict = TypeVar("T_TypedDict", bound=Mapping[Any, Any])
|
|
25
26
|
|
|
26
27
|
|
|
27
|
-
def get_harm_severity_level(harm_score: int) -> Union[str, float]:
|
|
28
|
+
def get_harm_severity_level(harm_score: Union[float, int]) -> Union[str, float]:
|
|
28
29
|
"""Generate harm severity level based on harm score.
|
|
29
30
|
|
|
30
31
|
:param harm_score: The harm score to be evaluated.
|
|
@@ -48,13 +49,19 @@ def get_harm_severity_level(harm_score: int) -> Union[str, float]:
|
|
|
48
49
|
|
|
49
50
|
def ensure_nltk_data_downloaded():
|
|
50
51
|
"""Download NLTK data packages if not already downloaded."""
|
|
52
|
+
nltk_data = [
|
|
53
|
+
("wordnet", "corpora/wordnet.zip"),
|
|
54
|
+
("perluniprops", "misc/perluniprops.zip"),
|
|
55
|
+
("punkt", "tokenizers/punkt.zip"),
|
|
56
|
+
("punkt_tab", "tokenizers/punkt_tab.zip"),
|
|
57
|
+
]
|
|
58
|
+
|
|
51
59
|
with _nltk_data_download_lock:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
nltk.download("punkt_tab")
|
|
60
|
+
for _id, resource_name in nltk_data:
|
|
61
|
+
try:
|
|
62
|
+
nltk.find(resource_name)
|
|
63
|
+
except LookupError:
|
|
64
|
+
nltk.download(_id)
|
|
58
65
|
|
|
59
66
|
|
|
60
67
|
def nltk_tokenize(text: str) -> List[str]:
|
|
@@ -122,24 +129,23 @@ def validate_azure_ai_project(o: object) -> AzureAIProject:
|
|
|
122
129
|
fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
|
|
123
130
|
|
|
124
131
|
if not isinstance(o, dict):
|
|
125
|
-
msg = "azure_ai_project must be a dictionary"
|
|
132
|
+
msg = "The 'azure_ai_project' parameter must be a dictionary."
|
|
126
133
|
raise EvaluationException(
|
|
127
134
|
message=msg,
|
|
128
|
-
|
|
129
|
-
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
130
|
-
category=ErrorCategory.MISSING_FIELD,
|
|
135
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
131
136
|
blame=ErrorBlame.USER_ERROR,
|
|
132
137
|
)
|
|
133
138
|
|
|
134
139
|
missing_fields = set(fields.keys()) - o.keys()
|
|
135
140
|
|
|
136
141
|
if missing_fields:
|
|
137
|
-
msg =
|
|
142
|
+
msg = (
|
|
143
|
+
"The 'azure_ai_project' dictionary is missing the following required "
|
|
144
|
+
f"field(s): {', '.join(f'{field}' for field in missing_fields)}."
|
|
145
|
+
)
|
|
138
146
|
raise EvaluationException(
|
|
139
147
|
message=msg,
|
|
140
|
-
|
|
141
|
-
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
142
|
-
category=ErrorCategory.MISSING_FIELD,
|
|
148
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
143
149
|
blame=ErrorBlame.USER_ERROR,
|
|
144
150
|
)
|
|
145
151
|
|
|
@@ -147,13 +153,10 @@ def validate_azure_ai_project(o: object) -> AzureAIProject:
|
|
|
147
153
|
if isinstance(o[field_name], expected_type):
|
|
148
154
|
continue
|
|
149
155
|
|
|
150
|
-
msg = f"
|
|
151
|
-
|
|
156
|
+
msg = f"Invalid type for field '{field_name}'. Expected {expected_type}, but got {type(o[field_name])}."
|
|
152
157
|
raise EvaluationException(
|
|
153
|
-
message=
|
|
154
|
-
|
|
155
|
-
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
156
|
-
category=ErrorCategory.MISSING_FIELD,
|
|
158
|
+
message=msg,
|
|
159
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
157
160
|
blame=ErrorBlame.USER_ERROR,
|
|
158
161
|
)
|
|
159
162
|
|
|
@@ -270,3 +273,139 @@ def _validate_typed_dict(o: object, t: Type[T_TypedDict]) -> T_TypedDict:
|
|
|
270
273
|
validate_annotation(v, annotations[k])
|
|
271
274
|
|
|
272
275
|
return cast(T_TypedDict, o)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def parse_quality_evaluator_reason_score(llm_output: str) -> Tuple[float, str]:
|
|
279
|
+
"""Parse the output of prompt-based quality evaluators that return a score and reason.
|
|
280
|
+
|
|
281
|
+
Current supported evaluators:
|
|
282
|
+
- Fluency
|
|
283
|
+
- Relevance
|
|
284
|
+
- Retrieval
|
|
285
|
+
- Groundedness
|
|
286
|
+
- Coherence
|
|
287
|
+
|
|
288
|
+
:param llm_output: The output of the prompt-based quality evaluator.
|
|
289
|
+
:type llm_output: str
|
|
290
|
+
:return: The score and reason.
|
|
291
|
+
:rtype: Tuple[float, str]
|
|
292
|
+
"""
|
|
293
|
+
score = math.nan
|
|
294
|
+
reason = ""
|
|
295
|
+
if llm_output:
|
|
296
|
+
score_pattern = r"<S2>(.*?)</S2>"
|
|
297
|
+
reason_pattern = r"<S1>(.*?)</S1>"
|
|
298
|
+
score_match = re.findall(score_pattern, llm_output, re.DOTALL)
|
|
299
|
+
reason_match = re.findall(reason_pattern, llm_output, re.DOTALL)
|
|
300
|
+
if score_match:
|
|
301
|
+
score = float(score_match[0].strip())
|
|
302
|
+
if reason_match:
|
|
303
|
+
reason = reason_match[0].strip()
|
|
304
|
+
|
|
305
|
+
return score, reason
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def remove_optional_singletons(eval_class, singletons):
|
|
309
|
+
required_singletons = singletons.copy()
|
|
310
|
+
if hasattr(eval_class, "_OPTIONAL_PARAMS"): # pylint: disable=protected-access
|
|
311
|
+
for param in eval_class._OPTIONAL_PARAMS: # pylint: disable=protected-access
|
|
312
|
+
if param in singletons:
|
|
313
|
+
del required_singletons[param]
|
|
314
|
+
return required_singletons
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def retrieve_content_type(assistant_messages: List, metric: str) -> str:
|
|
318
|
+
"""Get the content type for service payload.
|
|
319
|
+
|
|
320
|
+
:param assistant_messages: The list of messages to be annotated by evaluation service
|
|
321
|
+
:type assistant_messages: list
|
|
322
|
+
:param metric: A string representing the metric type
|
|
323
|
+
:type metric: str
|
|
324
|
+
:return: A text representing the content type. Example: 'text', or 'image'
|
|
325
|
+
:rtype: str
|
|
326
|
+
"""
|
|
327
|
+
# Check if metric is "protected_material"
|
|
328
|
+
if metric == "protected_material":
|
|
329
|
+
return "image"
|
|
330
|
+
|
|
331
|
+
# Iterate through each message
|
|
332
|
+
for item in assistant_messages:
|
|
333
|
+
# Ensure "content" exists in the message and is iterable
|
|
334
|
+
content = item.get("content", [])
|
|
335
|
+
for message in content:
|
|
336
|
+
if message.get("type", "") == "image_url":
|
|
337
|
+
return "image"
|
|
338
|
+
# Default return if no image was found
|
|
339
|
+
return "text"
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def validate_conversation(conversation):
|
|
343
|
+
def raise_exception(msg, target):
|
|
344
|
+
raise EvaluationException(
|
|
345
|
+
message=msg,
|
|
346
|
+
internal_message=msg,
|
|
347
|
+
target=target,
|
|
348
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
349
|
+
blame=ErrorBlame.USER_ERROR,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if not conversation or "messages" not in conversation:
|
|
353
|
+
raise_exception(
|
|
354
|
+
"Attribute 'messages' is missing in the request",
|
|
355
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
356
|
+
)
|
|
357
|
+
messages = conversation["messages"]
|
|
358
|
+
if not isinstance(messages, list):
|
|
359
|
+
raise_exception(
|
|
360
|
+
"'messages' parameter must be a JSON-compatible list of chat messages",
|
|
361
|
+
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
|
|
362
|
+
)
|
|
363
|
+
expected_roles = {"user", "assistant", "system"}
|
|
364
|
+
image_found = False
|
|
365
|
+
for num, message in enumerate(messages, 1):
|
|
366
|
+
if not isinstance(message, dict):
|
|
367
|
+
try:
|
|
368
|
+
from azure.ai.inference.models import (
|
|
369
|
+
ChatRequestMessage,
|
|
370
|
+
UserMessage,
|
|
371
|
+
AssistantMessage,
|
|
372
|
+
SystemMessage,
|
|
373
|
+
ImageContentItem,
|
|
374
|
+
)
|
|
375
|
+
except ImportError as ex:
|
|
376
|
+
raise MissingRequiredPackage(
|
|
377
|
+
message="Please install 'azure-ai-inference' package to use SystemMessage, AssistantMessage"
|
|
378
|
+
) from ex
|
|
379
|
+
|
|
380
|
+
if isinstance(messages[0], ChatRequestMessage) and not isinstance(
|
|
381
|
+
message, (UserMessage, AssistantMessage, SystemMessage)
|
|
382
|
+
):
|
|
383
|
+
raise_exception(
|
|
384
|
+
f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
|
|
385
|
+
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
if isinstance(message.content, list) and any(
|
|
389
|
+
isinstance(item, ImageContentItem) for item in message.content
|
|
390
|
+
):
|
|
391
|
+
image_found = True
|
|
392
|
+
continue
|
|
393
|
+
if message.get("role") not in expected_roles:
|
|
394
|
+
raise_exception(
|
|
395
|
+
f"Invalid role provided: {message.get('role')}. Message number: {num}",
|
|
396
|
+
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
|
|
397
|
+
)
|
|
398
|
+
content = message.get("content")
|
|
399
|
+
if not isinstance(content, (str, list)):
|
|
400
|
+
raise_exception(
|
|
401
|
+
f"Content in each turn must be a string or array. Message number: {num}",
|
|
402
|
+
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
|
|
403
|
+
)
|
|
404
|
+
if isinstance(content, list):
|
|
405
|
+
if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
|
|
406
|
+
image_found = True
|
|
407
|
+
if not image_found:
|
|
408
|
+
raise_exception(
|
|
409
|
+
"Message needs to have multi-modal input like images.",
|
|
410
|
+
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
|
|
411
|
+
)
|
|
@@ -7,13 +7,13 @@ from typing import Literal
|
|
|
7
7
|
class EvaluationMetrics:
|
|
8
8
|
"""Metrics for model evaluation."""
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
GROUNDEDNESS = "groundedness"
|
|
11
|
+
RELEVANCE = "relevance"
|
|
12
|
+
COHERENCE = "coherence"
|
|
13
|
+
FLUENCY = "fluency"
|
|
14
|
+
SIMILARITY = "similarity"
|
|
15
15
|
F1_SCORE = "f1_score"
|
|
16
|
-
|
|
16
|
+
RETRIEVAL_SCORE = "retrieval_score"
|
|
17
17
|
HATE_FAIRNESS = "hate_fairness"
|
|
18
18
|
HATE_UNFAIRNESS = "hate_unfairness"
|
|
19
19
|
VIOLENCE = "violence"
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
from .
|
|
4
|
+
from .eval_run_context import EvalRunContext
|
|
5
5
|
from .code_client import CodeClient
|
|
6
6
|
from .proxy_client import ProxyClient
|
|
7
|
+
from .target_run_context import TargetRunContext
|
|
7
8
|
|
|
8
|
-
__all__ = ["CodeClient", "ProxyClient", "
|
|
9
|
+
__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
|
|
@@ -22,13 +22,13 @@ from .code_client import CodeClient
|
|
|
22
22
|
from .proxy_client import ProxyClient
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class
|
|
26
|
-
"""Context manager for batch run
|
|
25
|
+
class EvalRunContext:
|
|
26
|
+
"""Context manager for eval batch run.
|
|
27
27
|
|
|
28
28
|
:param client: The client to run in the context.
|
|
29
29
|
:type client: Union[
|
|
30
|
-
~azure.ai.evaluation._evaluate.
|
|
31
|
-
~azure.ai.evaluation._evaluate.
|
|
30
|
+
~azure.ai.evaluation._evaluate._batch_run.code_client.CodeClient,
|
|
31
|
+
~azure.ai.evaluation._evaluate._batch_run.proxy_client.ProxyClient
|
|
32
32
|
]
|
|
33
33
|
"""
|
|
34
34
|
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# pylint: disable=protected-access
|
|
6
|
+
|
|
4
7
|
import inspect
|
|
5
8
|
import logging
|
|
6
9
|
import math
|
|
7
10
|
import os
|
|
11
|
+
from collections import OrderedDict
|
|
8
12
|
from concurrent.futures import Future
|
|
9
13
|
from typing import Any, Callable, Dict, Optional, Union
|
|
10
|
-
from collections import OrderedDict
|
|
11
14
|
|
|
12
15
|
import pandas as pd
|
|
13
16
|
from promptflow.client import PFClient
|
|
@@ -37,7 +40,7 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
37
40
|
**kwargs
|
|
38
41
|
) -> ProxyRun:
|
|
39
42
|
flow_to_run = flow
|
|
40
|
-
if hasattr(flow, "_to_async"):
|
|
43
|
+
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
|
|
41
44
|
flow_to_run = flow._to_async() # pylint: disable=protected-access
|
|
42
45
|
|
|
43
46
|
batch_use_async = self._should_batch_use_async(flow_to_run)
|
|
@@ -77,7 +80,7 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
77
80
|
|
|
78
81
|
@staticmethod
|
|
79
82
|
def _should_batch_use_async(flow):
|
|
80
|
-
if os.getenv("
|
|
83
|
+
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
|
|
81
84
|
if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
|
|
82
85
|
return True
|
|
83
86
|
if inspect.iscoroutinefunction(flow):
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
import types
|
|
6
|
+
from typing import Optional, Type
|
|
7
|
+
|
|
8
|
+
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TargetRunContext:
|
|
12
|
+
"""Context manager for target batch run.
|
|
13
|
+
|
|
14
|
+
:param upload_snapshot: Whether to upload target snapshot.
|
|
15
|
+
:type upload_snapshot: bool
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, upload_snapshot: bool) -> None:
|
|
19
|
+
self._upload_snapshot = upload_snapshot
|
|
20
|
+
|
|
21
|
+
def __enter__(self) -> None:
|
|
22
|
+
# Address "[WinError 32] The process cannot access the file" error,
|
|
23
|
+
# caused by conflicts when the venv and target function are in the same directory.
|
|
24
|
+
# Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
|
|
25
|
+
if not self._upload_snapshot:
|
|
26
|
+
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
|
|
27
|
+
|
|
28
|
+
def __exit__(
|
|
29
|
+
self,
|
|
30
|
+
exc_type: Optional[Type[BaseException]],
|
|
31
|
+
exc_value: Optional[BaseException],
|
|
32
|
+
exc_tb: Optional[types.TracebackType],
|
|
33
|
+
) -> None:
|
|
34
|
+
if not self._upload_snapshot:
|
|
35
|
+
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
|
|
@@ -21,6 +21,7 @@ from azure.ai.evaluation._http_utils import get_http_client
|
|
|
21
21
|
from azure.ai.evaluation._version import VERSION
|
|
22
22
|
from azure.core.pipeline.policies import RetryPolicy
|
|
23
23
|
from azure.core.rest import HttpResponse
|
|
24
|
+
from azure.core.exceptions import HttpResponseError
|
|
24
25
|
|
|
25
26
|
LOGGER = logging.getLogger(__name__)
|
|
26
27
|
|
|
@@ -443,10 +444,26 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
443
444
|
datastore = self._ml_client.datastores.get_default(include_secrets=True)
|
|
444
445
|
account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
|
|
445
446
|
svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
447
|
+
try:
|
|
448
|
+
for local, remote in zip(local_paths, remote_paths["paths"]):
|
|
449
|
+
blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
|
|
450
|
+
with open(local, "rb") as fp:
|
|
451
|
+
blob_client.upload_blob(fp, overwrite=True)
|
|
452
|
+
except HttpResponseError as ex:
|
|
453
|
+
if ex.status_code == 403:
|
|
454
|
+
msg = (
|
|
455
|
+
"Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
|
|
456
|
+
" Please ensure that the necessary access rights are granted."
|
|
457
|
+
)
|
|
458
|
+
raise EvaluationException(
|
|
459
|
+
message=msg,
|
|
460
|
+
target=ErrorTarget.EVAL_RUN,
|
|
461
|
+
category=ErrorCategory.FAILED_REMOTE_TRACKING,
|
|
462
|
+
blame=ErrorBlame.USER_ERROR,
|
|
463
|
+
tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
|
|
464
|
+
) from ex
|
|
465
|
+
|
|
466
|
+
raise ex
|
|
450
467
|
|
|
451
468
|
# To show artifact in UI we will need to register it. If it is a promptflow run,
|
|
452
469
|
# we are rewriting already registered artifact and need to skip this step.
|