azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +9 -0
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
- azure/ai/evaluation/_aoai/label_grader.py +66 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
- azure/ai/evaluation/_azure/_clients.py +4 -4
- azure/ai/evaluation/_azure/_envs.py +208 -0
- azure/ai/evaluation/_azure/_token_manager.py +12 -7
- azure/ai/evaluation/_common/__init__.py +5 -0
- azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +139 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +158 -28
- azure/ai/evaluation/_common/raiclient/_version.py +1 -1
- azure/ai/evaluation/_common/utils.py +79 -1
- azure/ai/evaluation/_constants.py +16 -0
- azure/ai/evaluation/_eval_mapping.py +71 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +325 -74
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
- azure/ai/evaluation/_evaluate/_utils.py +117 -4
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +8 -3
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +7 -2
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
- azure/ai/evaluation/_exceptions.py +2 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
- azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +825 -450
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
- azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA +25 -2
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/RECORD +123 -65
- /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
|
|
4
|
+
import os
|
|
5
|
+
import posixpath
|
|
5
6
|
import re
|
|
6
7
|
import math
|
|
7
8
|
import threading
|
|
8
9
|
from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
|
|
9
10
|
|
|
10
11
|
import nltk
|
|
12
|
+
from azure.storage.blob import ContainerClient
|
|
11
13
|
from typing_extensions import NotRequired, Required, TypeGuard
|
|
12
14
|
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
|
|
13
15
|
from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
|
|
@@ -124,10 +126,25 @@ def construct_prompty_model_config(
|
|
|
124
126
|
|
|
125
127
|
return prompty_model_config
|
|
126
128
|
|
|
129
|
+
def is_onedp_project(azure_ai_project: AzureAIProject) -> bool:
|
|
130
|
+
"""Check if the Azure AI project is an OneDP project.
|
|
131
|
+
|
|
132
|
+
:param azure_ai_project: The scope of the Azure AI project.
|
|
133
|
+
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
134
|
+
:return: True if the Azure AI project is an OneDP project, False otherwise.
|
|
135
|
+
:rtype: bool
|
|
136
|
+
"""
|
|
137
|
+
if isinstance(azure_ai_project, str):
|
|
138
|
+
return True
|
|
139
|
+
return False
|
|
127
140
|
|
|
128
141
|
def validate_azure_ai_project(o: object) -> AzureAIProject:
|
|
129
142
|
fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
|
|
130
143
|
|
|
144
|
+
# TODO : Add regex check for malformed project uri
|
|
145
|
+
if is_onedp_project(o):
|
|
146
|
+
return o
|
|
147
|
+
|
|
131
148
|
if not isinstance(o, dict):
|
|
132
149
|
msg = "The 'azure_ai_project' parameter must be a dictionary."
|
|
133
150
|
raise EvaluationException(
|
|
@@ -463,3 +480,64 @@ def validate_conversation(conversation):
|
|
|
463
480
|
"User and assistant role expected as the only role in each message.",
|
|
464
481
|
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
465
482
|
)
|
|
483
|
+
|
|
484
|
+
def upload(path: str, container_client: ContainerClient, logger=None):
|
|
485
|
+
"""Upload files or directories to Azure Blob Storage using a container client.
|
|
486
|
+
|
|
487
|
+
This function uploads a file or all files in a directory (recursively) to Azure Blob Storage.
|
|
488
|
+
When uploading a directory, the relative path structure is preserved in the blob container.
|
|
489
|
+
|
|
490
|
+
:param path: The local path to a file or directory to upload
|
|
491
|
+
:type path: str
|
|
492
|
+
:param container_client: The Azure Blob Container client to use for uploading
|
|
493
|
+
:type container_client: azure.storage.blob.ContainerClient
|
|
494
|
+
:param logger: Optional logger for debug output, defaults to None
|
|
495
|
+
:type logger: logging.Logger, optional
|
|
496
|
+
:raises EvaluationException: If the path doesn't exist or errors occur during upload
|
|
497
|
+
"""
|
|
498
|
+
|
|
499
|
+
if not os.path.isdir(path) and not os.path.isfile(path):
|
|
500
|
+
raise EvaluationException(
|
|
501
|
+
message=f"Path '{path}' is not a directory or a file",
|
|
502
|
+
internal_message=f"Path '{path}' is not a directory or a file",
|
|
503
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
504
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
505
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
remote_paths = []
|
|
509
|
+
local_paths = []
|
|
510
|
+
|
|
511
|
+
if os.path.isdir(path):
|
|
512
|
+
for (root, _, filenames) in os.walk(path):
|
|
513
|
+
upload_path = ""
|
|
514
|
+
if root != path:
|
|
515
|
+
rel_path = os.path.relpath(root, path)
|
|
516
|
+
upload_path = posixpath.join(rel_path)
|
|
517
|
+
for f in filenames:
|
|
518
|
+
remote_file_path = posixpath.join(upload_path, f)
|
|
519
|
+
remote_paths.append(remote_file_path)
|
|
520
|
+
local_file_path = os.path.join(root, f)
|
|
521
|
+
local_paths.append(local_file_path)
|
|
522
|
+
|
|
523
|
+
if os.path.isfile(path):
|
|
524
|
+
remote_paths = [os.path.basename(path)]
|
|
525
|
+
local_paths = [path]
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
# Open the file in binary read mode
|
|
529
|
+
for local, remote in zip(local_paths, remote_paths):
|
|
530
|
+
with open(local, "rb") as data:
|
|
531
|
+
# Upload the file to Azure Blob Storage
|
|
532
|
+
container_client.upload_blob(data=data, name=remote)
|
|
533
|
+
if logger:
|
|
534
|
+
logger.debug(f"File '{local}' uploaded successfully")
|
|
535
|
+
|
|
536
|
+
except Exception as e:
|
|
537
|
+
raise EvaluationException(
|
|
538
|
+
message=f"Error uploading file: {e}",
|
|
539
|
+
internal_message=f"Error uploading file: {e}",
|
|
540
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
541
|
+
category=ErrorCategory.UPLOAD_ERROR,
|
|
542
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
543
|
+
)
|
|
@@ -62,6 +62,8 @@ class EvaluationRunProperties:
|
|
|
62
62
|
RUN_TYPE = "runType"
|
|
63
63
|
EVALUATION_RUN = "_azureml.evaluation_run"
|
|
64
64
|
EVALUATION_SDK = "_azureml.evaluation_sdk_name"
|
|
65
|
+
NAME_MAP = "_azureml.evaluation_name_map"
|
|
66
|
+
NAME_MAP_LENGTH = "_azureml.evaluation_name_map_length"
|
|
65
67
|
|
|
66
68
|
|
|
67
69
|
@experimental
|
|
@@ -79,6 +81,13 @@ class _AggregationType(enum.Enum):
|
|
|
79
81
|
SUM = "sum"
|
|
80
82
|
CUSTOM = "custom"
|
|
81
83
|
|
|
84
|
+
class TokenScope(str, enum.Enum):
|
|
85
|
+
"""Defines the scope of the token used to access Azure resources."""
|
|
86
|
+
|
|
87
|
+
DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
|
|
88
|
+
COGNITIVE_SERVICES_MANAGEMENT = "https://ai.azure.com/.default"
|
|
89
|
+
AZURE_ML = "https://ml.azure.com/.default"
|
|
90
|
+
|
|
82
91
|
|
|
83
92
|
DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
|
|
84
93
|
|
|
@@ -99,3 +108,10 @@ EVALUATION_PASS_FAIL_MAPPING = {
|
|
|
99
108
|
True: "pass",
|
|
100
109
|
False: "fail",
|
|
101
110
|
}
|
|
111
|
+
|
|
112
|
+
DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS = 60000
|
|
113
|
+
BINARY_AGGREGATE_SUFFIX = "binary_aggregate"
|
|
114
|
+
|
|
115
|
+
AOAI_COLUMN_NAME = "aoai"
|
|
116
|
+
DEFAULT_OAI_EVAL_RUN_NAME = "AI_SDK_EVAL_RUN"
|
|
117
|
+
DEFAULT_AOAI_API_VERSION = "2025-04-01-preview" # Unfortunately relying on preview version for now.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Note: This was removed from the normal constants file due to circular import issues.
|
|
6
|
+
|
|
7
|
+
# In the future, it would be nice to instead rely on the id value
|
|
8
|
+
# of each eval class, but I wouldn't like to rely on those before
|
|
9
|
+
# we simplify them into version-less, static values, instead of the
|
|
10
|
+
# problematic registry references they currently are.
|
|
11
|
+
|
|
12
|
+
# Import all evals
|
|
13
|
+
from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
|
|
14
|
+
from azure.ai.evaluation import (
|
|
15
|
+
BleuScoreEvaluator,
|
|
16
|
+
CodeVulnerabilityEvaluator,
|
|
17
|
+
CoherenceEvaluator,
|
|
18
|
+
ContentSafetyEvaluator,
|
|
19
|
+
F1ScoreEvaluator,
|
|
20
|
+
FluencyEvaluator,
|
|
21
|
+
GleuScoreEvaluator,
|
|
22
|
+
GroundednessEvaluator,
|
|
23
|
+
GroundednessProEvaluator,
|
|
24
|
+
HateUnfairnessEvaluator,
|
|
25
|
+
IndirectAttackEvaluator,
|
|
26
|
+
IntentResolutionEvaluator,
|
|
27
|
+
MeteorScoreEvaluator,
|
|
28
|
+
ProtectedMaterialEvaluator,
|
|
29
|
+
QAEvaluator,
|
|
30
|
+
RelevanceEvaluator,
|
|
31
|
+
ResponseCompletenessEvaluator,
|
|
32
|
+
RetrievalEvaluator,
|
|
33
|
+
RougeScoreEvaluator,
|
|
34
|
+
SelfHarmEvaluator,
|
|
35
|
+
SexualEvaluator,
|
|
36
|
+
SimilarityEvaluator,
|
|
37
|
+
TaskAdherenceEvaluator,
|
|
38
|
+
ToolCallAccuracyEvaluator,
|
|
39
|
+
UngroundedAttributesEvaluator,
|
|
40
|
+
ViolenceEvaluator
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
EVAL_CLASS_MAP = {
|
|
44
|
+
BleuScoreEvaluator: "bleu_score",
|
|
45
|
+
CodeVulnerabilityEvaluator: "code_vulnerability",
|
|
46
|
+
CoherenceEvaluator: "coherence",
|
|
47
|
+
ContentSafetyEvaluator: "content_safety",
|
|
48
|
+
ECIEvaluator: "eci",
|
|
49
|
+
F1ScoreEvaluator: "f1_score",
|
|
50
|
+
FluencyEvaluator: "fluency",
|
|
51
|
+
GleuScoreEvaluator: "gleu_score",
|
|
52
|
+
GroundednessEvaluator: "groundedness",
|
|
53
|
+
GroundednessProEvaluator: "groundedness_pro",
|
|
54
|
+
HateUnfairnessEvaluator: "hate_unfairness",
|
|
55
|
+
IndirectAttackEvaluator: "indirect_attack",
|
|
56
|
+
IntentResolutionEvaluator: "intent_resolution",
|
|
57
|
+
MeteorScoreEvaluator: "meteor_score",
|
|
58
|
+
ProtectedMaterialEvaluator: "protected_material",
|
|
59
|
+
QAEvaluator: "qa",
|
|
60
|
+
RelevanceEvaluator: "relevance",
|
|
61
|
+
ResponseCompletenessEvaluator: "response_completeness",
|
|
62
|
+
RetrievalEvaluator: "retrieval",
|
|
63
|
+
RougeScoreEvaluator: "rouge_score",
|
|
64
|
+
SelfHarmEvaluator: "self_harm",
|
|
65
|
+
SexualEvaluator: "sexual",
|
|
66
|
+
SimilarityEvaluator: "similarity",
|
|
67
|
+
TaskAdherenceEvaluator: "task_adherence",
|
|
68
|
+
ToolCallAccuracyEvaluator: "tool_call_accuracy",
|
|
69
|
+
UngroundedAttributesEvaluator: "ungrounded_attributes",
|
|
70
|
+
ViolenceEvaluator: "violence",
|
|
71
|
+
}
|
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
+
import asyncio
|
|
5
6
|
import logging
|
|
6
7
|
import pandas as pd
|
|
7
8
|
import sys
|
|
8
9
|
from collections import defaultdict
|
|
9
|
-
from concurrent.futures import Future
|
|
10
|
+
from concurrent.futures import Future
|
|
10
11
|
from os import PathLike
|
|
11
12
|
from typing import Any, Callable, Dict, Final, List, Mapping, Optional, Sequence, Union, cast
|
|
12
13
|
|
|
@@ -14,6 +15,8 @@ from .batch_clients import BatchClientRun, HasAsyncCallable
|
|
|
14
15
|
from ..._legacy._batch_engine._run_submitter import RunSubmitter
|
|
15
16
|
from ..._legacy._batch_engine._config import BatchEngineConfig
|
|
16
17
|
from ..._legacy._batch_engine._run import Run
|
|
18
|
+
from ..._legacy._adapters._constants import LINE_NUMBER
|
|
19
|
+
from ..._legacy._common._thread_pool_executor_with_context import ThreadPoolExecutorWithContext
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -22,7 +25,9 @@ LOGGER = logging.getLogger(__name__)
|
|
|
22
25
|
class RunSubmitterClient:
|
|
23
26
|
def __init__(self, config: Optional[BatchEngineConfig] = None) -> None:
|
|
24
27
|
self._config = config or BatchEngineConfig(LOGGER, use_async=True)
|
|
25
|
-
self._thread_pool =
|
|
28
|
+
self._thread_pool = ThreadPoolExecutorWithContext(
|
|
29
|
+
thread_name_prefix="evaluators_thread",
|
|
30
|
+
max_workers=self._config.max_concurrency)
|
|
26
31
|
|
|
27
32
|
def run(
|
|
28
33
|
self,
|
|
@@ -33,30 +38,36 @@ class RunSubmitterClient:
|
|
|
33
38
|
**kwargs: Any,
|
|
34
39
|
) -> BatchClientRun:
|
|
35
40
|
if not isinstance(data, pd.DataFrame):
|
|
36
|
-
# Should never get here
|
|
37
41
|
raise ValueError("Data must be a pandas DataFrame")
|
|
38
|
-
if not column_mapping:
|
|
39
|
-
raise ValueError("Column mapping must be provided")
|
|
40
42
|
|
|
41
|
-
# The column mappings are
|
|
43
|
+
# The column mappings are indexed by data to indicate they come from the data
|
|
42
44
|
# input. Update the inputs so that each entry is a dictionary with a data key
|
|
43
45
|
# that contains the original input data.
|
|
44
46
|
inputs = [{"data": input_data} for input_data in data.to_dict(orient="records")]
|
|
45
47
|
|
|
46
|
-
#
|
|
48
|
+
# Pass the correct previous run to the evaluator
|
|
49
|
+
run: Optional[BatchClientRun] = kwargs.pop("run", None)
|
|
50
|
+
if run:
|
|
51
|
+
kwargs["run"] = self._get_run(run)
|
|
52
|
+
|
|
53
|
+
# Try to get async function to use
|
|
47
54
|
if isinstance(flow, HasAsyncCallable):
|
|
48
55
|
flow = flow._to_async() # pylint: disable=protected-access
|
|
49
56
|
|
|
50
|
-
|
|
57
|
+
# Start an event loop for async execution on a thread pool thread to separate it
|
|
58
|
+
# from the caller's thread.
|
|
59
|
+
run_submitter = RunSubmitter(self._config, self._thread_pool)
|
|
51
60
|
run_future = self._thread_pool.submit(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
61
|
+
asyncio.run,
|
|
62
|
+
run_submitter.submit(
|
|
63
|
+
dynamic_callable=flow,
|
|
64
|
+
inputs=inputs,
|
|
65
|
+
column_mapping=column_mapping,
|
|
66
|
+
name_prefix=evaluator_name,
|
|
67
|
+
created_on=kwargs.pop("created_on", None),
|
|
68
|
+
storage_creator=kwargs.pop("storage_creator", None),
|
|
69
|
+
**kwargs,
|
|
70
|
+
)
|
|
60
71
|
)
|
|
61
72
|
|
|
62
73
|
return run_future
|
|
@@ -75,7 +86,10 @@ class RunSubmitterClient:
|
|
|
75
86
|
key = f"{prefix}.{k}"
|
|
76
87
|
data[key].append(value)
|
|
77
88
|
|
|
89
|
+
# Go from a list of dictionaries (i.e. a row view of the data) to a dictionary of lists
|
|
90
|
+
# (i.e. a column view of the data)
|
|
78
91
|
_update("inputs", run.inputs)
|
|
92
|
+
_update("inputs", [{ LINE_NUMBER: i } for i in range(len(run.inputs)) ])
|
|
79
93
|
_update("outputs", run.outputs)
|
|
80
94
|
|
|
81
95
|
df = pd.DataFrame(data).reindex(columns=[k for k in data.keys()])
|
|
@@ -8,6 +8,10 @@ from typing import Optional, Type, Union
|
|
|
8
8
|
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
|
|
9
9
|
from azure.ai.evaluation._legacy._adapters.utils import ClientUserAgentUtil
|
|
10
10
|
from azure.ai.evaluation._legacy._adapters.tracing import inject_openai_api, recover_openai_api
|
|
11
|
+
from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
|
|
12
|
+
inject_openai_api as ported_inject_openai_api,
|
|
13
|
+
recover_openai_api as ported_recover_openai_api,
|
|
14
|
+
)
|
|
11
15
|
|
|
12
16
|
from azure.ai.evaluation._constants import (
|
|
13
17
|
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
|
|
@@ -68,6 +72,7 @@ class EvalRunContext:
|
|
|
68
72
|
|
|
69
73
|
if isinstance(self.client, RunSubmitterClient):
|
|
70
74
|
set_event_loop_policy()
|
|
75
|
+
ported_inject_openai_api()
|
|
71
76
|
|
|
72
77
|
def __exit__(
|
|
73
78
|
self,
|
|
@@ -92,3 +97,6 @@ class EvalRunContext:
|
|
|
92
97
|
if self._is_otel_timeout_set_by_system:
|
|
93
98
|
os.environ.pop(OTEL_EXPORTER_OTLP_TRACES_TIMEOUT, None)
|
|
94
99
|
self._is_otel_timeout_set_by_system = False
|
|
100
|
+
|
|
101
|
+
if isinstance(self.client, RunSubmitterClient):
|
|
102
|
+
ported_recover_openai_api()
|
|
@@ -58,6 +58,11 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
58
58
|
if not name:
|
|
59
59
|
name = f"azure_ai_evaluation_evaluators_{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
|
|
60
60
|
|
|
61
|
+
# Pass the correct previous run to the evaluator
|
|
62
|
+
run: Optional[BatchClientRun] = kwargs.pop("run", None)
|
|
63
|
+
if run:
|
|
64
|
+
kwargs["run"] = self.get_result(run)
|
|
65
|
+
|
|
61
66
|
batch_use_async = self._should_batch_use_async(flow_to_run)
|
|
62
67
|
eval_future = self._thread_pool.submit(
|
|
63
68
|
self._pf_client.run,
|
|
@@ -5,8 +5,15 @@ import os
|
|
|
5
5
|
import types
|
|
6
6
|
from typing import Optional, Type
|
|
7
7
|
|
|
8
|
+
from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClient
|
|
9
|
+
from azure.ai.evaluation._evaluate._batch_run import RunSubmitterClient
|
|
8
10
|
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
|
|
11
|
+
from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
|
|
12
|
+
inject_openai_api as ported_inject_openai_api,
|
|
13
|
+
recover_openai_api as ported_recover_openai_api,
|
|
14
|
+
)
|
|
9
15
|
from azure.ai.evaluation._constants import PF_DISABLE_TRACING
|
|
16
|
+
from azure.ai.evaluation._evaluate._utils import set_event_loop_policy
|
|
10
17
|
|
|
11
18
|
|
|
12
19
|
class TargetRunContext:
|
|
@@ -16,7 +23,8 @@ class TargetRunContext:
|
|
|
16
23
|
:type upload_snapshot: bool
|
|
17
24
|
"""
|
|
18
25
|
|
|
19
|
-
def __init__(self, upload_snapshot: bool = False) -> None:
|
|
26
|
+
def __init__(self, client: BatchClient, upload_snapshot: bool = False) -> None:
|
|
27
|
+
self._client = client
|
|
20
28
|
self._upload_snapshot = upload_snapshot
|
|
21
29
|
self._original_cwd = os.getcwd()
|
|
22
30
|
|
|
@@ -32,6 +40,11 @@ class TargetRunContext:
|
|
|
32
40
|
|
|
33
41
|
os.environ[PF_DISABLE_TRACING] = "true"
|
|
34
42
|
|
|
43
|
+
if isinstance(self._client, RunSubmitterClient):
|
|
44
|
+
ported_inject_openai_api()
|
|
45
|
+
# For addressing the issue of asyncio event loop closed on Windows
|
|
46
|
+
set_event_loop_policy()
|
|
47
|
+
|
|
35
48
|
def __exit__(
|
|
36
49
|
self,
|
|
37
50
|
exc_type: Optional[Type[BaseException]],
|
|
@@ -44,3 +57,6 @@ class TargetRunContext:
|
|
|
44
57
|
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
|
|
45
58
|
|
|
46
59
|
os.environ.pop(PF_DISABLE_TRACING, None)
|
|
60
|
+
|
|
61
|
+
if isinstance(self._client, RunSubmitterClient):
|
|
62
|
+
ported_recover_openai_api()
|
|
@@ -295,7 +295,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
295
295
|
return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
|
|
296
296
|
|
|
297
297
|
def _get_token(self) -> str:
|
|
298
|
-
return self._management_client.get_token()
|
|
298
|
+
return self._management_client.get_token().token
|
|
299
299
|
|
|
300
300
|
def request_with_retry(
|
|
301
301
|
self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
|