azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +10 -0
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
- azure/ai/evaluation/_aoai/label_grader.py +66 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
- azure/ai/evaluation/_azure/_clients.py +4 -4
- azure/ai/evaluation/_azure/_envs.py +208 -0
- azure/ai/evaluation/_azure/_token_manager.py +12 -7
- azure/ai/evaluation/_common/__init__.py +7 -0
- azure/ai/evaluation/_common/evaluation_onedp_client.py +163 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +139 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +165 -34
- azure/ai/evaluation/_common/raiclient/_version.py +1 -1
- azure/ai/evaluation/_common/utils.py +79 -1
- azure/ai/evaluation/_constants.py +16 -0
- azure/ai/evaluation/_converters/_ai_services.py +162 -118
- azure/ai/evaluation/_converters/_models.py +76 -6
- azure/ai/evaluation/_eval_mapping.py +73 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +325 -76
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +553 -0
- azure/ai/evaluation/_evaluate/_utils.py +117 -4
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +12 -3
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +469 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +11 -1
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -1
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +16 -2
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +11 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -2
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +11 -1
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +16 -2
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +86 -12
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
- azure/ai/evaluation/_exceptions.py +2 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
- azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +114 -22
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +976 -546
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
- azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +38 -25
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +43 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +26 -18
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +15 -10
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/METADATA +49 -3
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/RECORD +144 -86
- /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
-
|
|
4
|
+
import os
|
|
5
|
+
import posixpath
|
|
5
6
|
import re
|
|
6
7
|
import math
|
|
7
8
|
import threading
|
|
8
9
|
from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cast, get_args, get_origin
|
|
9
10
|
|
|
10
11
|
import nltk
|
|
12
|
+
from azure.storage.blob import ContainerClient
|
|
11
13
|
from typing_extensions import NotRequired, Required, TypeGuard
|
|
12
14
|
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
|
|
13
15
|
from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
|
|
@@ -124,10 +126,25 @@ def construct_prompty_model_config(
|
|
|
124
126
|
|
|
125
127
|
return prompty_model_config
|
|
126
128
|
|
|
129
|
+
def is_onedp_project(azure_ai_project: AzureAIProject) -> bool:
|
|
130
|
+
"""Check if the Azure AI project is an OneDP project.
|
|
131
|
+
|
|
132
|
+
:param azure_ai_project: The scope of the Azure AI project.
|
|
133
|
+
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
134
|
+
:return: True if the Azure AI project is an OneDP project, False otherwise.
|
|
135
|
+
:rtype: bool
|
|
136
|
+
"""
|
|
137
|
+
if isinstance(azure_ai_project, str):
|
|
138
|
+
return True
|
|
139
|
+
return False
|
|
127
140
|
|
|
128
141
|
def validate_azure_ai_project(o: object) -> AzureAIProject:
|
|
129
142
|
fields = {"subscription_id": str, "resource_group_name": str, "project_name": str}
|
|
130
143
|
|
|
144
|
+
# TODO : Add regex check for malformed project uri
|
|
145
|
+
if is_onedp_project(o):
|
|
146
|
+
return o
|
|
147
|
+
|
|
131
148
|
if not isinstance(o, dict):
|
|
132
149
|
msg = "The 'azure_ai_project' parameter must be a dictionary."
|
|
133
150
|
raise EvaluationException(
|
|
@@ -463,3 +480,64 @@ def validate_conversation(conversation):
|
|
|
463
480
|
"User and assistant role expected as the only role in each message.",
|
|
464
481
|
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
465
482
|
)
|
|
483
|
+
|
|
484
|
+
def upload(path: str, container_client: ContainerClient, logger=None):
|
|
485
|
+
"""Upload files or directories to Azure Blob Storage using a container client.
|
|
486
|
+
|
|
487
|
+
This function uploads a file or all files in a directory (recursively) to Azure Blob Storage.
|
|
488
|
+
When uploading a directory, the relative path structure is preserved in the blob container.
|
|
489
|
+
|
|
490
|
+
:param path: The local path to a file or directory to upload
|
|
491
|
+
:type path: str
|
|
492
|
+
:param container_client: The Azure Blob Container client to use for uploading
|
|
493
|
+
:type container_client: azure.storage.blob.ContainerClient
|
|
494
|
+
:param logger: Optional logger for debug output, defaults to None
|
|
495
|
+
:type logger: logging.Logger, optional
|
|
496
|
+
:raises EvaluationException: If the path doesn't exist or errors occur during upload
|
|
497
|
+
"""
|
|
498
|
+
|
|
499
|
+
if not os.path.isdir(path) and not os.path.isfile(path):
|
|
500
|
+
raise EvaluationException(
|
|
501
|
+
message=f"Path '{path}' is not a directory or a file",
|
|
502
|
+
internal_message=f"Path '{path}' is not a directory or a file",
|
|
503
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
504
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
505
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
remote_paths = []
|
|
509
|
+
local_paths = []
|
|
510
|
+
|
|
511
|
+
if os.path.isdir(path):
|
|
512
|
+
for (root, _, filenames) in os.walk(path):
|
|
513
|
+
upload_path = ""
|
|
514
|
+
if root != path:
|
|
515
|
+
rel_path = os.path.relpath(root, path)
|
|
516
|
+
upload_path = posixpath.join(rel_path)
|
|
517
|
+
for f in filenames:
|
|
518
|
+
remote_file_path = posixpath.join(upload_path, f)
|
|
519
|
+
remote_paths.append(remote_file_path)
|
|
520
|
+
local_file_path = os.path.join(root, f)
|
|
521
|
+
local_paths.append(local_file_path)
|
|
522
|
+
|
|
523
|
+
if os.path.isfile(path):
|
|
524
|
+
remote_paths = [os.path.basename(path)]
|
|
525
|
+
local_paths = [path]
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
# Open the file in binary read mode
|
|
529
|
+
for local, remote in zip(local_paths, remote_paths):
|
|
530
|
+
with open(local, "rb") as data:
|
|
531
|
+
# Upload the file to Azure Blob Storage
|
|
532
|
+
container_client.upload_blob(data=data, name=remote)
|
|
533
|
+
if logger:
|
|
534
|
+
logger.debug(f"File '{local}' uploaded successfully")
|
|
535
|
+
|
|
536
|
+
except Exception as e:
|
|
537
|
+
raise EvaluationException(
|
|
538
|
+
message=f"Error uploading file: {e}",
|
|
539
|
+
internal_message=f"Error uploading file: {e}",
|
|
540
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
541
|
+
category=ErrorCategory.UPLOAD_ERROR,
|
|
542
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
543
|
+
)
|
|
@@ -62,6 +62,8 @@ class EvaluationRunProperties:
|
|
|
62
62
|
RUN_TYPE = "runType"
|
|
63
63
|
EVALUATION_RUN = "_azureml.evaluation_run"
|
|
64
64
|
EVALUATION_SDK = "_azureml.evaluation_sdk_name"
|
|
65
|
+
NAME_MAP = "_azureml.evaluation_name_map"
|
|
66
|
+
NAME_MAP_LENGTH = "_azureml.evaluation_name_map_length"
|
|
65
67
|
|
|
66
68
|
|
|
67
69
|
@experimental
|
|
@@ -79,6 +81,13 @@ class _AggregationType(enum.Enum):
|
|
|
79
81
|
SUM = "sum"
|
|
80
82
|
CUSTOM = "custom"
|
|
81
83
|
|
|
84
|
+
class TokenScope(str, enum.Enum):
|
|
85
|
+
"""Defines the scope of the token used to access Azure resources."""
|
|
86
|
+
|
|
87
|
+
DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
|
|
88
|
+
COGNITIVE_SERVICES_MANAGEMENT = "https://ai.azure.com/.default"
|
|
89
|
+
AZURE_ML = "https://ml.azure.com/.default"
|
|
90
|
+
|
|
82
91
|
|
|
83
92
|
DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
|
|
84
93
|
|
|
@@ -99,3 +108,10 @@ EVALUATION_PASS_FAIL_MAPPING = {
|
|
|
99
108
|
True: "pass",
|
|
100
109
|
False: "fail",
|
|
101
110
|
}
|
|
111
|
+
|
|
112
|
+
DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS = 60000
|
|
113
|
+
BINARY_AGGREGATE_SUFFIX = "binary_aggregate"
|
|
114
|
+
|
|
115
|
+
AOAI_COLUMN_NAME = "aoai"
|
|
116
|
+
DEFAULT_OAI_EVAL_RUN_NAME = "AI_SDK_EVAL_RUN"
|
|
117
|
+
DEFAULT_AOAI_API_VERSION = "2025-04-01-preview" # Unfortunately relying on preview version for now.
|
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from abc import abstractmethod
|
|
2
3
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
3
4
|
|
|
5
|
+
from azure.ai.projects import __version__ as projects_version
|
|
4
6
|
from azure.ai.projects import AIProjectClient
|
|
5
|
-
from azure.ai.projects.models import (
|
|
6
|
-
ThreadRun,
|
|
7
|
-
RunStep,
|
|
8
|
-
RunStepToolCallDetails,
|
|
9
|
-
FunctionDefinition,
|
|
10
|
-
ListSortOrder,
|
|
11
|
-
)
|
|
12
7
|
|
|
13
8
|
from typing import List, Union
|
|
14
9
|
|
|
15
10
|
from azure.ai.evaluation._common._experimental import experimental
|
|
11
|
+
from packaging.version import Version
|
|
16
12
|
|
|
17
13
|
# Constants.
|
|
18
|
-
from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION
|
|
14
|
+
from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION, _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
|
|
19
15
|
|
|
20
16
|
# Message instances.
|
|
21
17
|
from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
|
|
@@ -26,61 +22,20 @@ from ._models import ToolDefinition, EvaluatorData
|
|
|
26
22
|
# Utilities.
|
|
27
23
|
from ._models import break_tool_call_into_messages, convert_message
|
|
28
24
|
|
|
29
|
-
# Maximum items to fetch in a single AI Services API call (imposed by the service).
|
|
30
|
-
_AI_SERVICES_API_MAX_LIMIT = 100
|
|
31
|
-
|
|
32
|
-
# Maximum number of workers allowed to make API calls at the same time.
|
|
33
|
-
_MAX_WORKERS = 10
|
|
34
|
-
|
|
35
|
-
# Constants to only be used internally in this file for the built-in tools.
|
|
36
|
-
_CODE_INTERPRETER = "code_interpreter"
|
|
37
|
-
_BING_GROUNDING = "bing_grounding"
|
|
38
|
-
_FILE_SEARCH = "file_search"
|
|
39
|
-
|
|
40
|
-
# Built-in tool descriptions and parameters are hidden, but we include basic descriptions
|
|
41
|
-
# for evaluation purposes.
|
|
42
|
-
_BUILT_IN_DESCRIPTIONS = {
|
|
43
|
-
_CODE_INTERPRETER: "Use code interpreter to read and interpret information from datasets, "
|
|
44
|
-
+ "generate code, and create graphs and charts using your data. Supports "
|
|
45
|
-
+ "up to 20 files.",
|
|
46
|
-
_BING_GROUNDING: "Enhance model output with web data.",
|
|
47
|
-
_FILE_SEARCH: "Search for data across uploaded files.",
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
# Built-in tool parameters are hidden, but we include basic parameters for evaluation purposes.
|
|
51
|
-
_BUILT_IN_PARAMS = {
|
|
52
|
-
_CODE_INTERPRETER: {
|
|
53
|
-
"type": "object",
|
|
54
|
-
"properties": {"input": {"type": "string", "description": "Generated code to be executed."}},
|
|
55
|
-
},
|
|
56
|
-
_BING_GROUNDING: {
|
|
57
|
-
"type": "object",
|
|
58
|
-
"properties": {"requesturl": {"type": "string", "description": "URL used in Bing Search API."}},
|
|
59
|
-
},
|
|
60
|
-
_FILE_SEARCH: {
|
|
61
|
-
"type": "object",
|
|
62
|
-
"properties": {
|
|
63
|
-
"ranking_options": {
|
|
64
|
-
"type": "object",
|
|
65
|
-
"properties": {
|
|
66
|
-
"ranker": {"type": "string", "description": "Ranking algorithm to use."},
|
|
67
|
-
"score_threshold": {"type": "number", "description": "Threshold for search results."},
|
|
68
|
-
},
|
|
69
|
-
"description": "Ranking options for search results.",
|
|
70
|
-
}
|
|
71
|
-
},
|
|
72
|
-
},
|
|
73
|
-
}
|
|
74
25
|
|
|
75
26
|
@experimental
|
|
76
27
|
class AIAgentConverter:
|
|
77
28
|
"""
|
|
78
|
-
A converter for AI agent data.
|
|
29
|
+
A converter for AI agent data. Data retrieval classes handle getting agent data depending on
|
|
30
|
+
agent version.
|
|
79
31
|
|
|
80
32
|
:param project_client: The AI project client used for API interactions.
|
|
81
33
|
:type project_client: AIProjectClient
|
|
82
34
|
"""
|
|
83
35
|
|
|
36
|
+
# Maximum number of workers allowed to make API calls at the same time.
|
|
37
|
+
_MAX_WORKERS = 10
|
|
38
|
+
|
|
84
39
|
def __init__(self, project_client: AIProjectClient):
|
|
85
40
|
"""
|
|
86
41
|
Initializes the AIAgentConverter with the given AI project client.
|
|
@@ -89,30 +44,16 @@ class AIAgentConverter:
|
|
|
89
44
|
:type project_client: AIProjectClient
|
|
90
45
|
"""
|
|
91
46
|
self.project_client = project_client
|
|
47
|
+
self._data_retriever = AIAgentConverter._get_data_retriever(project_client=project_client)
|
|
92
48
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
:
|
|
100
|
-
|
|
101
|
-
to_return = []
|
|
102
|
-
|
|
103
|
-
has_more = True
|
|
104
|
-
after = None
|
|
105
|
-
while has_more:
|
|
106
|
-
messages = self.project_client.agents.list_messages(
|
|
107
|
-
thread_id=thread_id, limit=_AI_SERVICES_API_MAX_LIMIT, order=ListSortOrder.ASCENDING, after=after
|
|
108
|
-
)
|
|
109
|
-
has_more = messages.has_more
|
|
110
|
-
after = messages.last_id
|
|
111
|
-
if messages.data:
|
|
112
|
-
# We need to add the messages to the accumulator.
|
|
113
|
-
to_return.extend(messages.data)
|
|
114
|
-
|
|
115
|
-
return to_return
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _get_data_retriever(project_client: AIProjectClient):
|
|
51
|
+
if project_client is None:
|
|
52
|
+
return None
|
|
53
|
+
if Version(projects_version) > Version("1.0.0b10"):
|
|
54
|
+
return FDPAgentDataRetriever(project_client=project_client)
|
|
55
|
+
else:
|
|
56
|
+
return LegacyAgentDataRetriever(project_client=project_client)
|
|
116
57
|
|
|
117
58
|
def _list_tool_calls_chronological(self, thread_id: str, run_id: str) -> List[ToolCall]:
|
|
118
59
|
"""
|
|
@@ -127,29 +68,14 @@ class AIAgentConverter:
|
|
|
127
68
|
"""
|
|
128
69
|
# This is the other API request that we need to make to AI service, such that we can get the details about
|
|
129
70
|
# the tool calls and results. Since the list is given in reverse chronological order, we need to reverse it.
|
|
130
|
-
run_steps_chronological
|
|
131
|
-
has_more = True
|
|
132
|
-
after = None
|
|
133
|
-
while has_more:
|
|
134
|
-
run_steps = self.project_client.agents.list_run_steps(
|
|
135
|
-
thread_id=thread_id,
|
|
136
|
-
run_id=run_id,
|
|
137
|
-
limit=_AI_SERVICES_API_MAX_LIMIT,
|
|
138
|
-
order=ListSortOrder.ASCENDING,
|
|
139
|
-
after=after,
|
|
140
|
-
)
|
|
141
|
-
has_more = run_steps.has_more
|
|
142
|
-
after = run_steps.last_id
|
|
143
|
-
if run_steps.data:
|
|
144
|
-
# We need to add the run steps to the accumulator.
|
|
145
|
-
run_steps_chronological.extend(run_steps.data)
|
|
71
|
+
run_steps_chronological = self._data_retriever._list_run_steps_chronological(thread_id=thread_id, run_id=run_id)
|
|
146
72
|
|
|
147
73
|
# Let's accumulate the function calls in chronological order. Function calls
|
|
148
74
|
tool_calls_chronological: List[ToolCall] = []
|
|
149
75
|
for run_step_chronological in run_steps_chronological:
|
|
150
76
|
if run_step_chronological.type != _TOOL_CALLS:
|
|
151
77
|
continue
|
|
152
|
-
step_details:
|
|
78
|
+
step_details: object = run_step_chronological.step_details
|
|
153
79
|
if step_details.type != _TOOL_CALLS:
|
|
154
80
|
continue
|
|
155
81
|
if len(step_details.tool_calls) < 1:
|
|
@@ -166,26 +92,13 @@ class AIAgentConverter:
|
|
|
166
92
|
|
|
167
93
|
return tool_calls_chronological
|
|
168
94
|
|
|
169
|
-
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
170
|
-
"""
|
|
171
|
-
Lists run IDs in chronological order for a given thread.
|
|
172
|
-
|
|
173
|
-
:param thread_id: The ID of the thread.
|
|
174
|
-
:type thread_id: str
|
|
175
|
-
:return: A list of run IDs in chronological order.
|
|
176
|
-
:rtype: List[str]
|
|
177
|
-
"""
|
|
178
|
-
runs = self.project_client.agents.list_runs(thread_id=thread_id, order=ListSortOrder.ASCENDING)
|
|
179
|
-
run_ids = [run["id"] for run in runs["data"]]
|
|
180
|
-
return run_ids
|
|
181
|
-
|
|
182
95
|
@staticmethod
|
|
183
|
-
def _extract_function_tool_definitions(thread_run:
|
|
96
|
+
def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinition]:
|
|
184
97
|
"""
|
|
185
98
|
Extracts tool definitions from a thread run.
|
|
186
99
|
|
|
187
100
|
:param thread_run: The thread run containing tool definitions.
|
|
188
|
-
:type thread_run:
|
|
101
|
+
:type thread_run: object
|
|
189
102
|
:return: A list of tool definitions extracted from the thread run.
|
|
190
103
|
:rtype: List[ToolDefinition]
|
|
191
104
|
"""
|
|
@@ -202,6 +115,7 @@ class AIAgentConverter:
|
|
|
202
115
|
|
|
203
116
|
final_tools.append(
|
|
204
117
|
ToolDefinition(
|
|
118
|
+
type="function",
|
|
205
119
|
name=tool_function.name,
|
|
206
120
|
description=tool_function.description,
|
|
207
121
|
parameters=parameters,
|
|
@@ -213,6 +127,7 @@ class AIAgentConverter:
|
|
|
213
127
|
if tool.type in _BUILT_IN_DESCRIPTIONS and tool.type in _BUILT_IN_PARAMS:
|
|
214
128
|
final_tools.append(
|
|
215
129
|
ToolDefinition(
|
|
130
|
+
type=tool.type,
|
|
216
131
|
name=tool.type,
|
|
217
132
|
description=_BUILT_IN_DESCRIPTIONS[tool.type],
|
|
218
133
|
parameters=_BUILT_IN_PARAMS[tool.type],
|
|
@@ -406,12 +321,12 @@ class AIAgentConverter:
|
|
|
406
321
|
# We set the include_run_id to False, since we don't want to include the current run's tool calls, which
|
|
407
322
|
# are already included in the previous step.
|
|
408
323
|
run_ids_up_to_run_id = AIAgentConverter._filter_run_ids_up_to_run_id(
|
|
409
|
-
self._list_run_ids_chronological(thread_id), run_id, include_run_id=False
|
|
324
|
+
self._data_retriever._list_run_ids_chronological(thread_id), run_id, include_run_id=False
|
|
410
325
|
)
|
|
411
326
|
|
|
412
327
|
# Since each _list_tool_calls_chronological call is expensive, we can use a thread pool to speed
|
|
413
328
|
# up the process by parallelizing the AI Services API requests.
|
|
414
|
-
with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
|
|
329
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
415
330
|
futures = {
|
|
416
331
|
executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id
|
|
417
332
|
for run_id in run_ids_up_to_run_id
|
|
@@ -437,7 +352,7 @@ class AIAgentConverter:
|
|
|
437
352
|
"""
|
|
438
353
|
to_return: List[Message] = []
|
|
439
354
|
|
|
440
|
-
with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
|
|
355
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
441
356
|
futures = {executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id for run_id in run_ids}
|
|
442
357
|
for future in as_completed(futures):
|
|
443
358
|
to_return.extend(future.result())
|
|
@@ -498,10 +413,10 @@ class AIAgentConverter:
|
|
|
498
413
|
:rtype: dict
|
|
499
414
|
"""
|
|
500
415
|
# Make the API call once and reuse the result.
|
|
501
|
-
thread_run:
|
|
416
|
+
thread_run: object = self._data_retriever._get_run(thread_id=thread_id, run_id=run_id)
|
|
502
417
|
|
|
503
418
|
# Walk through the "user-facing" conversation history and start adding messages.
|
|
504
|
-
chronological_conversation = self._list_messages_chronological(thread_id)
|
|
419
|
+
chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
|
|
505
420
|
|
|
506
421
|
# Since this is Xth run of out possibly N runs, we are only interested is messages that are before the run X.
|
|
507
422
|
chrono_until_run_id = AIAgentConverter._filter_messages_up_to_run_id(chronological_conversation, run_id)
|
|
@@ -557,14 +472,14 @@ class AIAgentConverter:
|
|
|
557
472
|
list_of_run_evaluations: List[dict] = []
|
|
558
473
|
|
|
559
474
|
# These are all the run IDs.
|
|
560
|
-
run_ids = self._list_run_ids_chronological(thread_id)
|
|
475
|
+
run_ids = self._data_retriever._list_run_ids_chronological(thread_id)
|
|
561
476
|
|
|
562
477
|
# If there were no messages in the thread, we can return an empty list.
|
|
563
478
|
if len(run_ids) < 1:
|
|
564
479
|
return list_of_run_evaluations
|
|
565
480
|
|
|
566
481
|
# These are all the messages.
|
|
567
|
-
chronological_conversation = self._list_messages_chronological(thread_id)
|
|
482
|
+
chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
|
|
568
483
|
|
|
569
484
|
# If there are no messages in the thread, we can return an empty list.
|
|
570
485
|
if len(chronological_conversation) < 1:
|
|
@@ -574,7 +489,7 @@ class AIAgentConverter:
|
|
|
574
489
|
all_sorted_tool_calls = AIAgentConverter._sort_messages(self._retrieve_all_tool_calls(thread_id, run_ids))
|
|
575
490
|
|
|
576
491
|
# The last run should have all the tool definitions.
|
|
577
|
-
thread_run = self.
|
|
492
|
+
thread_run = self._data_retriever._get_run(thread_id=thread_id, run_id=run_ids[-1])
|
|
578
493
|
instructions = thread_run.instructions
|
|
579
494
|
|
|
580
495
|
# So then we can get the tool definitions.
|
|
@@ -647,7 +562,7 @@ class AIAgentConverter:
|
|
|
647
562
|
return self._prepare_single_thread_evaluation_data(thread_id=thread_ids, filename=filename)
|
|
648
563
|
|
|
649
564
|
evaluations = []
|
|
650
|
-
with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
|
|
565
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
651
566
|
# We override the filename, because we don't want to write the file for each thread, having to handle
|
|
652
567
|
# threading issues and file being opened from multiple threads, instead, we just want to write it once
|
|
653
568
|
# at the end.
|
|
@@ -802,3 +717,132 @@ class AIAgentConverter:
|
|
|
802
717
|
data = json.load(file)
|
|
803
718
|
|
|
804
719
|
return AIAgentConverter._convert_from_conversation(data, run_id)
|
|
720
|
+
|
|
721
|
+
@experimental
|
|
722
|
+
class AIAgentDataRetriever:
|
|
723
|
+
# Maximum items to fetch in a single AI Services API call (imposed by the service).
|
|
724
|
+
_AI_SERVICES_API_MAX_LIMIT = 100
|
|
725
|
+
|
|
726
|
+
def __init__(self, project_client: AIProjectClient):
|
|
727
|
+
"""
|
|
728
|
+
Initializes the AIAgentDataRetriever with the given AI project client.
|
|
729
|
+
|
|
730
|
+
:param project_client: The AI project client used for API interactions.
|
|
731
|
+
:type project_client: AIProjectClient
|
|
732
|
+
"""
|
|
733
|
+
self.project_client = project_client
|
|
734
|
+
|
|
735
|
+
@abstractmethod
|
|
736
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
737
|
+
pass
|
|
738
|
+
|
|
739
|
+
@abstractmethod
|
|
740
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
741
|
+
pass
|
|
742
|
+
|
|
743
|
+
@abstractmethod
|
|
744
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
745
|
+
pass
|
|
746
|
+
|
|
747
|
+
@abstractmethod
|
|
748
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
749
|
+
pass
|
|
750
|
+
|
|
751
|
+
@experimental
|
|
752
|
+
class LegacyAgentDataRetriever(AIAgentDataRetriever):
|
|
753
|
+
|
|
754
|
+
def __init__(self, **kwargs):
|
|
755
|
+
super(LegacyAgentDataRetriever, self).__init__(**kwargs)
|
|
756
|
+
|
|
757
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
758
|
+
"""
|
|
759
|
+
Lists messages in chronological order for a given thread.
|
|
760
|
+
|
|
761
|
+
:param thread_id: The ID of the thread.
|
|
762
|
+
:type thread_id: str
|
|
763
|
+
:return: A list of messages in chronological order.
|
|
764
|
+
"""
|
|
765
|
+
to_return = []
|
|
766
|
+
|
|
767
|
+
has_more = True
|
|
768
|
+
after = None
|
|
769
|
+
while has_more:
|
|
770
|
+
messages = self.project_client.agents.list_messages(
|
|
771
|
+
thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc", after=after)
|
|
772
|
+
has_more = messages.has_more
|
|
773
|
+
after = messages.last_id
|
|
774
|
+
if messages.data:
|
|
775
|
+
# We need to add the messages to the accumulator.
|
|
776
|
+
to_return.extend(messages.data)
|
|
777
|
+
|
|
778
|
+
return to_return
|
|
779
|
+
|
|
780
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
781
|
+
run_steps_chronological: List[object] = []
|
|
782
|
+
has_more = True
|
|
783
|
+
after = None
|
|
784
|
+
while has_more:
|
|
785
|
+
run_steps = self.project_client.agents.list_run_steps(
|
|
786
|
+
thread_id=thread_id,
|
|
787
|
+
run_id=run_id,
|
|
788
|
+
limit=self._AI_SERVICES_API_MAX_LIMIT,
|
|
789
|
+
order="asc",
|
|
790
|
+
after=after,
|
|
791
|
+
)
|
|
792
|
+
has_more = run_steps.has_more
|
|
793
|
+
after = run_steps.last_id
|
|
794
|
+
if run_steps.data:
|
|
795
|
+
# We need to add the run steps to the accumulator.
|
|
796
|
+
run_steps_chronological.extend(run_steps.data)
|
|
797
|
+
return run_steps_chronological
|
|
798
|
+
|
|
799
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
800
|
+
"""
|
|
801
|
+
Lists run IDs in chronological order for a given thread.
|
|
802
|
+
|
|
803
|
+
:param thread_id: The ID of the thread.
|
|
804
|
+
:type thread_id: str
|
|
805
|
+
:return: A list of run IDs in chronological order.
|
|
806
|
+
:rtype: List[str]
|
|
807
|
+
"""
|
|
808
|
+
runs = self.project_client.agents.list_runs(thread_id=thread_id, order="asc")
|
|
809
|
+
run_ids = [run["id"] for run in runs["data"]]
|
|
810
|
+
return run_ids
|
|
811
|
+
|
|
812
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
813
|
+
return self.project_client.agents.get_run(thread_id=thread_id, run_id=run_id)
|
|
814
|
+
|
|
815
|
+
@experimental
|
|
816
|
+
class FDPAgentDataRetriever(AIAgentDataRetriever):
|
|
817
|
+
|
|
818
|
+
def __init__(self, **kwargs):
|
|
819
|
+
super(FDPAgentDataRetriever, self).__init__(**kwargs)
|
|
820
|
+
|
|
821
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
822
|
+
"""
|
|
823
|
+
Lists messages in chronological order for a given thread.
|
|
824
|
+
|
|
825
|
+
:param thread_id: The ID of the thread.
|
|
826
|
+
:type thread_id: str
|
|
827
|
+
:return: A list of messages in chronological order.
|
|
828
|
+
"""
|
|
829
|
+
message_iter = self.project_client.agents.messages.list(
|
|
830
|
+
thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc"
|
|
831
|
+
)
|
|
832
|
+
return [message for message in message_iter]
|
|
833
|
+
|
|
834
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
835
|
+
|
|
836
|
+
return self.project_client.agents.run_steps.list(
|
|
837
|
+
thread_id=thread_id,
|
|
838
|
+
run_id=run_id,
|
|
839
|
+
limit=self._AI_SERVICES_API_MAX_LIMIT,
|
|
840
|
+
order="asc"
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
844
|
+
runs = self.project_client.agents.runs.list(thread_id=thread_id, order="asc")
|
|
845
|
+
return [run.id for run in runs]
|
|
846
|
+
|
|
847
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
848
|
+
return self.project_client.agents.runs.get(thread_id=thread_id, run_id=run_id)
|