azure-ai-evaluation 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +1 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +1 -1
- azure/ai/evaluation/_aoai/label_grader.py +2 -2
- azure/ai/evaluation/_aoai/string_check_grader.py +2 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +2 -2
- azure/ai/evaluation/_common/__init__.py +3 -1
- azure/ai/evaluation/_common/evaluation_onedp_client.py +50 -5
- azure/ai/evaluation/_common/onedp/operations/_operations.py +1 -1
- azure/ai/evaluation/_common/rai_service.py +7 -6
- azure/ai/evaluation/_converters/_ai_services.py +162 -118
- azure/ai/evaluation/_converters/_models.py +76 -6
- azure/ai/evaluation/_eval_mapping.py +2 -0
- azure/ai/evaluation/_evaluate/_evaluate.py +11 -13
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +24 -5
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +4 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +31 -29
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +10 -0
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +10 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +10 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +13 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +10 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +80 -10
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +26 -7
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +183 -128
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +3 -3
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +3 -3
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +2 -0
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +6 -5
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/METADATA +26 -3
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/RECORD +55 -55
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/top_level.txt +0 -0
azure/ai/evaluation/__init__.py
CHANGED
|
@@ -31,6 +31,7 @@ from ._evaluators._xpia import IndirectAttackEvaluator
|
|
|
31
31
|
from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
|
|
32
32
|
from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
|
|
33
33
|
from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
|
|
34
|
+
from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
|
|
34
35
|
from ._model_configurations import (
|
|
35
36
|
AzureAIProject,
|
|
36
37
|
AzureOpenAIModelConfiguration,
|
|
@@ -77,7 +77,7 @@ class AzureOpenAIGrader():
|
|
|
77
77
|
return AzureOpenAI(
|
|
78
78
|
azure_endpoint=self._model_config["azure_endpoint"],
|
|
79
79
|
api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
|
|
80
|
-
api_version=
|
|
80
|
+
api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
|
|
81
81
|
azure_deployment=self._model_config.get("azure_deployment", ""),
|
|
82
82
|
)
|
|
83
83
|
from openai import OpenAI
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
from typing import Any, Dict, Union, List
|
|
5
5
|
|
|
6
6
|
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
7
|
-
from openai.types.
|
|
7
|
+
from openai.types.graders import LabelModelGrader
|
|
8
8
|
from azure.ai.evaluation._common._experimental import experimental
|
|
9
9
|
|
|
10
10
|
from .aoai_grader import AzureOpenAIGrader
|
|
@@ -55,7 +55,7 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
|
|
|
55
55
|
passing_labels: List[str],
|
|
56
56
|
**kwargs: Any
|
|
57
57
|
):
|
|
58
|
-
grader =
|
|
58
|
+
grader = LabelModelGrader(
|
|
59
59
|
input=input,
|
|
60
60
|
labels=labels,
|
|
61
61
|
model=model,
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, Union
|
|
|
5
5
|
from typing_extensions import Literal
|
|
6
6
|
|
|
7
7
|
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
8
|
-
from openai.types.
|
|
8
|
+
from openai.types.graders import StringCheckGrader
|
|
9
9
|
from azure.ai.evaluation._common._experimental import experimental
|
|
10
10
|
|
|
11
11
|
from .aoai_grader import AzureOpenAIGrader
|
|
@@ -55,7 +55,7 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
|
|
|
55
55
|
reference: str,
|
|
56
56
|
**kwargs: Any
|
|
57
57
|
):
|
|
58
|
-
grader =
|
|
58
|
+
grader = StringCheckGrader(
|
|
59
59
|
input=input,
|
|
60
60
|
name=name,
|
|
61
61
|
operation=operation,
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, Union
|
|
|
5
5
|
from typing_extensions import Literal
|
|
6
6
|
|
|
7
7
|
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
8
|
-
from openai.types.
|
|
8
|
+
from openai.types.graders import TextSimilarityGrader
|
|
9
9
|
from azure.ai.evaluation._common._experimental import experimental
|
|
10
10
|
|
|
11
11
|
from .aoai_grader import AzureOpenAIGrader
|
|
@@ -77,7 +77,7 @@ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
|
|
|
77
77
|
name: str,
|
|
78
78
|
**kwargs: Any
|
|
79
79
|
):
|
|
80
|
-
grader =
|
|
80
|
+
grader = TextSimilarityGrader(
|
|
81
81
|
evaluation_metric=evaluation_metric,
|
|
82
82
|
input=input,
|
|
83
83
|
pass_threshold=pass_threshold,
|
|
@@ -9,7 +9,7 @@ from . import constants
|
|
|
9
9
|
from .rai_service import evaluate_with_rai_service
|
|
10
10
|
from .utils import get_harm_severity_level
|
|
11
11
|
from .evaluation_onedp_client import EvaluationServiceOneDPClient
|
|
12
|
-
from .onedp.models import EvaluationUpload, EvaluationResult
|
|
12
|
+
from .onedp.models import EvaluationUpload, EvaluationResult, RedTeamUpload, ResultType
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
15
|
"get_harm_severity_level",
|
|
@@ -18,4 +18,6 @@ __all__ = [
|
|
|
18
18
|
"EvaluationServiceOneDPClient",
|
|
19
19
|
"EvaluationResult",
|
|
20
20
|
"EvaluationUpload",
|
|
21
|
+
"RedTeamUpload",
|
|
22
|
+
"ResultType",
|
|
21
23
|
]
|
|
@@ -7,7 +7,7 @@ from typing import Union, Any, Dict
|
|
|
7
7
|
from azure.core.credentials import AzureKeyCredential, TokenCredential
|
|
8
8
|
from azure.ai.evaluation._common.onedp import AIProjectClient as RestEvaluationServiceClient
|
|
9
9
|
from azure.ai.evaluation._common.onedp.models import (PendingUploadRequest, PendingUploadType, EvaluationResult,
|
|
10
|
-
ResultType, AssetCredentialRequest, EvaluationUpload, InputDataset)
|
|
10
|
+
ResultType, AssetCredentialRequest, EvaluationUpload, InputDataset, RedTeamUpload)
|
|
11
11
|
from azure.storage.blob import ContainerClient
|
|
12
12
|
from .utils import upload
|
|
13
13
|
|
|
@@ -22,7 +22,8 @@ class EvaluationServiceOneDPClient:
|
|
|
22
22
|
**kwargs,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
-
def create_evaluation_result(
|
|
25
|
+
def create_evaluation_result(
|
|
26
|
+
self, *, name: str, path: str, version=1, metrics: Dict[str, int]=None, result_type: ResultType=ResultType.EVALUATION, **kwargs) -> EvaluationResult:
|
|
26
27
|
"""Create and upload evaluation results to Azure evaluation service.
|
|
27
28
|
|
|
28
29
|
This method uploads evaluation results from a local path to Azure Blob Storage
|
|
@@ -39,14 +40,16 @@ class EvaluationServiceOneDPClient:
|
|
|
39
40
|
:param version: The version number for the evaluation results, defaults to 1
|
|
40
41
|
:type version: int, optional
|
|
41
42
|
:param metrics: Metrics to be added to evaluation result
|
|
42
|
-
:type
|
|
43
|
+
:type metrics: Dict[str, int], optional
|
|
44
|
+
:param result_type: Evaluation Result Type to create
|
|
45
|
+
:type result_type: ResultType, optional
|
|
43
46
|
:param kwargs: Additional keyword arguments to pass to the underlying API calls
|
|
44
47
|
:return: The response from creating the evaluation result version
|
|
45
48
|
:rtype: EvaluationResult
|
|
46
49
|
:raises: Various exceptions from the underlying API calls or upload process
|
|
47
50
|
"""
|
|
48
51
|
|
|
49
|
-
LOGGER.debug(f"Creating evaluation result for {name} with version {version} from path {path}")
|
|
52
|
+
LOGGER.debug(f"Creating evaluation result for {name} with version {version} type {result_type} from path {path}")
|
|
50
53
|
start_pending_upload_response = self.rest_client.evaluation_results.start_pending_upload(
|
|
51
54
|
name=name,
|
|
52
55
|
version=version,
|
|
@@ -63,7 +66,7 @@ class EvaluationServiceOneDPClient:
|
|
|
63
66
|
create_version_response = self.rest_client.evaluation_results.create_or_update_version(
|
|
64
67
|
body=EvaluationResult(
|
|
65
68
|
blob_uri=start_pending_upload_response.blob_reference_for_consumption.blob_uri,
|
|
66
|
-
result_type=
|
|
69
|
+
result_type=result_type,
|
|
67
70
|
name=name,
|
|
68
71
|
version=version,
|
|
69
72
|
metrics=metrics,
|
|
@@ -115,4 +118,46 @@ class EvaluationServiceOneDPClient:
|
|
|
115
118
|
**kwargs
|
|
116
119
|
)
|
|
117
120
|
|
|
121
|
+
return update_run_response
|
|
122
|
+
|
|
123
|
+
def start_red_team_run(self, *, red_team: RedTeamUpload, **kwargs):
|
|
124
|
+
"""Start a new red team run in the Azure evaluation service.
|
|
125
|
+
|
|
126
|
+
This method creates a new red team run with the provided configuration details.
|
|
127
|
+
|
|
128
|
+
:param red_team: The red team configuration to upload
|
|
129
|
+
:type red_team: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
|
|
130
|
+
:param kwargs: Additional keyword arguments to pass to the underlying API calls
|
|
131
|
+
:return: The created red team run object
|
|
132
|
+
:rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
|
|
133
|
+
:raises: Various exceptions from the underlying API calls
|
|
134
|
+
"""
|
|
135
|
+
upload_run_response = self.rest_client.red_teams.upload_run(
|
|
136
|
+
redteam=red_team,
|
|
137
|
+
**kwargs
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return upload_run_response
|
|
141
|
+
|
|
142
|
+
def update_red_team_run(self, *, name: str, red_team: RedTeamUpload, **kwargs):
|
|
143
|
+
"""Update an existing red team run in the Azure evaluation service.
|
|
144
|
+
|
|
145
|
+
This method updates a red team run with new information such as status changes,
|
|
146
|
+
result references, or other metadata.
|
|
147
|
+
|
|
148
|
+
:param name: The identifier of the red team run to update
|
|
149
|
+
:type name: str
|
|
150
|
+
:param red_team: The updated red team configuration
|
|
151
|
+
:type red_team: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
|
|
152
|
+
:param kwargs: Additional keyword arguments to pass to the underlying API calls
|
|
153
|
+
:return: The updated red team run object
|
|
154
|
+
:rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
|
|
155
|
+
:raises: Various exceptions from the underlying API calls
|
|
156
|
+
"""
|
|
157
|
+
update_run_response = self.rest_client.red_teams.upload_update_run(
|
|
158
|
+
name=name,
|
|
159
|
+
redteam=red_team,
|
|
160
|
+
**kwargs
|
|
161
|
+
)
|
|
162
|
+
|
|
118
163
|
return update_run_response
|
|
@@ -4267,7 +4267,7 @@ class RedTeamsOperations:
|
|
|
4267
4267
|
if isinstance(redteam, (IOBase, bytes)):
|
|
4268
4268
|
_content = redteam
|
|
4269
4269
|
else:
|
|
4270
|
-
_content = json.dumps(redteam, cls=SdkJSONEncoder, exclude_readonly=
|
|
4270
|
+
_content = json.dumps(redteam, cls=SdkJSONEncoder, exclude_readonly=False) # type: ignore
|
|
4271
4271
|
|
|
4272
4272
|
_request = build_red_teams_upload_update_run_request(
|
|
4273
4273
|
name=name,
|
|
@@ -629,8 +629,9 @@ async def evaluate_with_rai_service(
|
|
|
629
629
|
:type data: dict
|
|
630
630
|
:param metric_name: The evaluation metric to use.
|
|
631
631
|
:type metric_name: str
|
|
632
|
-
:param project_scope: The Azure AI project
|
|
633
|
-
|
|
632
|
+
:param project_scope: The Azure AI project, which can either be a string representing the project endpoint
|
|
633
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
634
|
+
:type project_scope: Union[str, AzureAIProject]
|
|
634
635
|
:param credential: The Azure authentication credential.
|
|
635
636
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
636
637
|
:param annotation_task: The annotation task to use.
|
|
@@ -777,11 +778,11 @@ async def evaluate_with_rai_service_multimodal(
|
|
|
777
778
|
:type messages: str
|
|
778
779
|
:param metric_name: The evaluation metric to use.
|
|
779
780
|
:type metric_name: str
|
|
780
|
-
:param project_scope: The Azure AI project
|
|
781
|
-
|
|
781
|
+
:param project_scope: The Azure AI project, which can either be a string representing the project endpoint
|
|
782
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
783
|
+
:type project_scope: Union[str, AzureAIProject]
|
|
782
784
|
:param credential: The Azure authentication credential.
|
|
783
|
-
:type credential:
|
|
784
|
-
~azure.core.credentials.TokenCredential
|
|
785
|
+
:type credential: ~azure.core.credentials.TokenCredential
|
|
785
786
|
:return: The parsed annotation result.
|
|
786
787
|
:rtype: List[List[Dict]]
|
|
787
788
|
"""
|
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from abc import abstractmethod
|
|
2
3
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
3
4
|
|
|
5
|
+
from azure.ai.projects import __version__ as projects_version
|
|
4
6
|
from azure.ai.projects import AIProjectClient
|
|
5
|
-
from azure.ai.projects.models import (
|
|
6
|
-
ThreadRun,
|
|
7
|
-
RunStep,
|
|
8
|
-
RunStepToolCallDetails,
|
|
9
|
-
FunctionDefinition,
|
|
10
|
-
ListSortOrder,
|
|
11
|
-
)
|
|
12
7
|
|
|
13
8
|
from typing import List, Union
|
|
14
9
|
|
|
15
10
|
from azure.ai.evaluation._common._experimental import experimental
|
|
11
|
+
from packaging.version import Version
|
|
16
12
|
|
|
17
13
|
# Constants.
|
|
18
|
-
from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION
|
|
14
|
+
from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION, _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
|
|
19
15
|
|
|
20
16
|
# Message instances.
|
|
21
17
|
from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
|
|
@@ -26,61 +22,20 @@ from ._models import ToolDefinition, EvaluatorData
|
|
|
26
22
|
# Utilities.
|
|
27
23
|
from ._models import break_tool_call_into_messages, convert_message
|
|
28
24
|
|
|
29
|
-
# Maximum items to fetch in a single AI Services API call (imposed by the service).
|
|
30
|
-
_AI_SERVICES_API_MAX_LIMIT = 100
|
|
31
|
-
|
|
32
|
-
# Maximum number of workers allowed to make API calls at the same time.
|
|
33
|
-
_MAX_WORKERS = 10
|
|
34
|
-
|
|
35
|
-
# Constants to only be used internally in this file for the built-in tools.
|
|
36
|
-
_CODE_INTERPRETER = "code_interpreter"
|
|
37
|
-
_BING_GROUNDING = "bing_grounding"
|
|
38
|
-
_FILE_SEARCH = "file_search"
|
|
39
|
-
|
|
40
|
-
# Built-in tool descriptions and parameters are hidden, but we include basic descriptions
|
|
41
|
-
# for evaluation purposes.
|
|
42
|
-
_BUILT_IN_DESCRIPTIONS = {
|
|
43
|
-
_CODE_INTERPRETER: "Use code interpreter to read and interpret information from datasets, "
|
|
44
|
-
+ "generate code, and create graphs and charts using your data. Supports "
|
|
45
|
-
+ "up to 20 files.",
|
|
46
|
-
_BING_GROUNDING: "Enhance model output with web data.",
|
|
47
|
-
_FILE_SEARCH: "Search for data across uploaded files.",
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
# Built-in tool parameters are hidden, but we include basic parameters for evaluation purposes.
|
|
51
|
-
_BUILT_IN_PARAMS = {
|
|
52
|
-
_CODE_INTERPRETER: {
|
|
53
|
-
"type": "object",
|
|
54
|
-
"properties": {"input": {"type": "string", "description": "Generated code to be executed."}},
|
|
55
|
-
},
|
|
56
|
-
_BING_GROUNDING: {
|
|
57
|
-
"type": "object",
|
|
58
|
-
"properties": {"requesturl": {"type": "string", "description": "URL used in Bing Search API."}},
|
|
59
|
-
},
|
|
60
|
-
_FILE_SEARCH: {
|
|
61
|
-
"type": "object",
|
|
62
|
-
"properties": {
|
|
63
|
-
"ranking_options": {
|
|
64
|
-
"type": "object",
|
|
65
|
-
"properties": {
|
|
66
|
-
"ranker": {"type": "string", "description": "Ranking algorithm to use."},
|
|
67
|
-
"score_threshold": {"type": "number", "description": "Threshold for search results."},
|
|
68
|
-
},
|
|
69
|
-
"description": "Ranking options for search results.",
|
|
70
|
-
}
|
|
71
|
-
},
|
|
72
|
-
},
|
|
73
|
-
}
|
|
74
25
|
|
|
75
26
|
@experimental
|
|
76
27
|
class AIAgentConverter:
|
|
77
28
|
"""
|
|
78
|
-
A converter for AI agent data.
|
|
29
|
+
A converter for AI agent data. Data retrieval classes handle getting agent data depending on
|
|
30
|
+
agent version.
|
|
79
31
|
|
|
80
32
|
:param project_client: The AI project client used for API interactions.
|
|
81
33
|
:type project_client: AIProjectClient
|
|
82
34
|
"""
|
|
83
35
|
|
|
36
|
+
# Maximum number of workers allowed to make API calls at the same time.
|
|
37
|
+
_MAX_WORKERS = 10
|
|
38
|
+
|
|
84
39
|
def __init__(self, project_client: AIProjectClient):
|
|
85
40
|
"""
|
|
86
41
|
Initializes the AIAgentConverter with the given AI project client.
|
|
@@ -89,30 +44,16 @@ class AIAgentConverter:
|
|
|
89
44
|
:type project_client: AIProjectClient
|
|
90
45
|
"""
|
|
91
46
|
self.project_client = project_client
|
|
47
|
+
self._data_retriever = AIAgentConverter._get_data_retriever(project_client=project_client)
|
|
92
48
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
:
|
|
100
|
-
|
|
101
|
-
to_return = []
|
|
102
|
-
|
|
103
|
-
has_more = True
|
|
104
|
-
after = None
|
|
105
|
-
while has_more:
|
|
106
|
-
messages = self.project_client.agents.list_messages(
|
|
107
|
-
thread_id=thread_id, limit=_AI_SERVICES_API_MAX_LIMIT, order=ListSortOrder.ASCENDING, after=after
|
|
108
|
-
)
|
|
109
|
-
has_more = messages.has_more
|
|
110
|
-
after = messages.last_id
|
|
111
|
-
if messages.data:
|
|
112
|
-
# We need to add the messages to the accumulator.
|
|
113
|
-
to_return.extend(messages.data)
|
|
114
|
-
|
|
115
|
-
return to_return
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _get_data_retriever(project_client: AIProjectClient):
|
|
51
|
+
if project_client is None:
|
|
52
|
+
return None
|
|
53
|
+
if Version(projects_version) > Version("1.0.0b10"):
|
|
54
|
+
return FDPAgentDataRetriever(project_client=project_client)
|
|
55
|
+
else:
|
|
56
|
+
return LegacyAgentDataRetriever(project_client=project_client)
|
|
116
57
|
|
|
117
58
|
def _list_tool_calls_chronological(self, thread_id: str, run_id: str) -> List[ToolCall]:
|
|
118
59
|
"""
|
|
@@ -127,29 +68,14 @@ class AIAgentConverter:
|
|
|
127
68
|
"""
|
|
128
69
|
# This is the other API request that we need to make to AI service, such that we can get the details about
|
|
129
70
|
# the tool calls and results. Since the list is given in reverse chronological order, we need to reverse it.
|
|
130
|
-
run_steps_chronological
|
|
131
|
-
has_more = True
|
|
132
|
-
after = None
|
|
133
|
-
while has_more:
|
|
134
|
-
run_steps = self.project_client.agents.list_run_steps(
|
|
135
|
-
thread_id=thread_id,
|
|
136
|
-
run_id=run_id,
|
|
137
|
-
limit=_AI_SERVICES_API_MAX_LIMIT,
|
|
138
|
-
order=ListSortOrder.ASCENDING,
|
|
139
|
-
after=after,
|
|
140
|
-
)
|
|
141
|
-
has_more = run_steps.has_more
|
|
142
|
-
after = run_steps.last_id
|
|
143
|
-
if run_steps.data:
|
|
144
|
-
# We need to add the run steps to the accumulator.
|
|
145
|
-
run_steps_chronological.extend(run_steps.data)
|
|
71
|
+
run_steps_chronological = self._data_retriever._list_run_steps_chronological(thread_id=thread_id, run_id=run_id)
|
|
146
72
|
|
|
147
73
|
# Let's accumulate the function calls in chronological order. Function calls
|
|
148
74
|
tool_calls_chronological: List[ToolCall] = []
|
|
149
75
|
for run_step_chronological in run_steps_chronological:
|
|
150
76
|
if run_step_chronological.type != _TOOL_CALLS:
|
|
151
77
|
continue
|
|
152
|
-
step_details:
|
|
78
|
+
step_details: object = run_step_chronological.step_details
|
|
153
79
|
if step_details.type != _TOOL_CALLS:
|
|
154
80
|
continue
|
|
155
81
|
if len(step_details.tool_calls) < 1:
|
|
@@ -166,26 +92,13 @@ class AIAgentConverter:
|
|
|
166
92
|
|
|
167
93
|
return tool_calls_chronological
|
|
168
94
|
|
|
169
|
-
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
170
|
-
"""
|
|
171
|
-
Lists run IDs in chronological order for a given thread.
|
|
172
|
-
|
|
173
|
-
:param thread_id: The ID of the thread.
|
|
174
|
-
:type thread_id: str
|
|
175
|
-
:return: A list of run IDs in chronological order.
|
|
176
|
-
:rtype: List[str]
|
|
177
|
-
"""
|
|
178
|
-
runs = self.project_client.agents.list_runs(thread_id=thread_id, order=ListSortOrder.ASCENDING)
|
|
179
|
-
run_ids = [run["id"] for run in runs["data"]]
|
|
180
|
-
return run_ids
|
|
181
|
-
|
|
182
95
|
@staticmethod
|
|
183
|
-
def _extract_function_tool_definitions(thread_run:
|
|
96
|
+
def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinition]:
|
|
184
97
|
"""
|
|
185
98
|
Extracts tool definitions from a thread run.
|
|
186
99
|
|
|
187
100
|
:param thread_run: The thread run containing tool definitions.
|
|
188
|
-
:type thread_run:
|
|
101
|
+
:type thread_run: object
|
|
189
102
|
:return: A list of tool definitions extracted from the thread run.
|
|
190
103
|
:rtype: List[ToolDefinition]
|
|
191
104
|
"""
|
|
@@ -202,6 +115,7 @@ class AIAgentConverter:
|
|
|
202
115
|
|
|
203
116
|
final_tools.append(
|
|
204
117
|
ToolDefinition(
|
|
118
|
+
type="function",
|
|
205
119
|
name=tool_function.name,
|
|
206
120
|
description=tool_function.description,
|
|
207
121
|
parameters=parameters,
|
|
@@ -213,6 +127,7 @@ class AIAgentConverter:
|
|
|
213
127
|
if tool.type in _BUILT_IN_DESCRIPTIONS and tool.type in _BUILT_IN_PARAMS:
|
|
214
128
|
final_tools.append(
|
|
215
129
|
ToolDefinition(
|
|
130
|
+
type=tool.type,
|
|
216
131
|
name=tool.type,
|
|
217
132
|
description=_BUILT_IN_DESCRIPTIONS[tool.type],
|
|
218
133
|
parameters=_BUILT_IN_PARAMS[tool.type],
|
|
@@ -406,12 +321,12 @@ class AIAgentConverter:
|
|
|
406
321
|
# We set the include_run_id to False, since we don't want to include the current run's tool calls, which
|
|
407
322
|
# are already included in the previous step.
|
|
408
323
|
run_ids_up_to_run_id = AIAgentConverter._filter_run_ids_up_to_run_id(
|
|
409
|
-
self._list_run_ids_chronological(thread_id), run_id, include_run_id=False
|
|
324
|
+
self._data_retriever._list_run_ids_chronological(thread_id), run_id, include_run_id=False
|
|
410
325
|
)
|
|
411
326
|
|
|
412
327
|
# Since each _list_tool_calls_chronological call is expensive, we can use a thread pool to speed
|
|
413
328
|
# up the process by parallelizing the AI Services API requests.
|
|
414
|
-
with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
|
|
329
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
415
330
|
futures = {
|
|
416
331
|
executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id
|
|
417
332
|
for run_id in run_ids_up_to_run_id
|
|
@@ -437,7 +352,7 @@ class AIAgentConverter:
|
|
|
437
352
|
"""
|
|
438
353
|
to_return: List[Message] = []
|
|
439
354
|
|
|
440
|
-
with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
|
|
355
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
441
356
|
futures = {executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id for run_id in run_ids}
|
|
442
357
|
for future in as_completed(futures):
|
|
443
358
|
to_return.extend(future.result())
|
|
@@ -498,10 +413,10 @@ class AIAgentConverter:
|
|
|
498
413
|
:rtype: dict
|
|
499
414
|
"""
|
|
500
415
|
# Make the API call once and reuse the result.
|
|
501
|
-
thread_run:
|
|
416
|
+
thread_run: object = self._data_retriever._get_run(thread_id=thread_id, run_id=run_id)
|
|
502
417
|
|
|
503
418
|
# Walk through the "user-facing" conversation history and start adding messages.
|
|
504
|
-
chronological_conversation = self._list_messages_chronological(thread_id)
|
|
419
|
+
chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
|
|
505
420
|
|
|
506
421
|
# Since this is Xth run of out possibly N runs, we are only interested is messages that are before the run X.
|
|
507
422
|
chrono_until_run_id = AIAgentConverter._filter_messages_up_to_run_id(chronological_conversation, run_id)
|
|
@@ -557,14 +472,14 @@ class AIAgentConverter:
|
|
|
557
472
|
list_of_run_evaluations: List[dict] = []
|
|
558
473
|
|
|
559
474
|
# These are all the run IDs.
|
|
560
|
-
run_ids = self._list_run_ids_chronological(thread_id)
|
|
475
|
+
run_ids = self._data_retriever._list_run_ids_chronological(thread_id)
|
|
561
476
|
|
|
562
477
|
# If there were no messages in the thread, we can return an empty list.
|
|
563
478
|
if len(run_ids) < 1:
|
|
564
479
|
return list_of_run_evaluations
|
|
565
480
|
|
|
566
481
|
# These are all the messages.
|
|
567
|
-
chronological_conversation = self._list_messages_chronological(thread_id)
|
|
482
|
+
chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
|
|
568
483
|
|
|
569
484
|
# If there are no messages in the thread, we can return an empty list.
|
|
570
485
|
if len(chronological_conversation) < 1:
|
|
@@ -574,7 +489,7 @@ class AIAgentConverter:
|
|
|
574
489
|
all_sorted_tool_calls = AIAgentConverter._sort_messages(self._retrieve_all_tool_calls(thread_id, run_ids))
|
|
575
490
|
|
|
576
491
|
# The last run should have all the tool definitions.
|
|
577
|
-
thread_run = self.
|
|
492
|
+
thread_run = self._data_retriever._get_run(thread_id=thread_id, run_id=run_ids[-1])
|
|
578
493
|
instructions = thread_run.instructions
|
|
579
494
|
|
|
580
495
|
# So then we can get the tool definitions.
|
|
@@ -647,7 +562,7 @@ class AIAgentConverter:
|
|
|
647
562
|
return self._prepare_single_thread_evaluation_data(thread_id=thread_ids, filename=filename)
|
|
648
563
|
|
|
649
564
|
evaluations = []
|
|
650
|
-
with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
|
|
565
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
651
566
|
# We override the filename, because we don't want to write the file for each thread, having to handle
|
|
652
567
|
# threading issues and file being opened from multiple threads, instead, we just want to write it once
|
|
653
568
|
# at the end.
|
|
@@ -802,3 +717,132 @@ class AIAgentConverter:
|
|
|
802
717
|
data = json.load(file)
|
|
803
718
|
|
|
804
719
|
return AIAgentConverter._convert_from_conversation(data, run_id)
|
|
720
|
+
|
|
721
|
+
@experimental
|
|
722
|
+
class AIAgentDataRetriever:
|
|
723
|
+
# Maximum items to fetch in a single AI Services API call (imposed by the service).
|
|
724
|
+
_AI_SERVICES_API_MAX_LIMIT = 100
|
|
725
|
+
|
|
726
|
+
def __init__(self, project_client: AIProjectClient):
|
|
727
|
+
"""
|
|
728
|
+
Initializes the AIAgentDataRetriever with the given AI project client.
|
|
729
|
+
|
|
730
|
+
:param project_client: The AI project client used for API interactions.
|
|
731
|
+
:type project_client: AIProjectClient
|
|
732
|
+
"""
|
|
733
|
+
self.project_client = project_client
|
|
734
|
+
|
|
735
|
+
@abstractmethod
|
|
736
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
737
|
+
pass
|
|
738
|
+
|
|
739
|
+
@abstractmethod
|
|
740
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
741
|
+
pass
|
|
742
|
+
|
|
743
|
+
@abstractmethod
|
|
744
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
745
|
+
pass
|
|
746
|
+
|
|
747
|
+
@abstractmethod
|
|
748
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
749
|
+
pass
|
|
750
|
+
|
|
751
|
+
@experimental
|
|
752
|
+
class LegacyAgentDataRetriever(AIAgentDataRetriever):
|
|
753
|
+
|
|
754
|
+
def __init__(self, **kwargs):
|
|
755
|
+
super(LegacyAgentDataRetriever, self).__init__(**kwargs)
|
|
756
|
+
|
|
757
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
758
|
+
"""
|
|
759
|
+
Lists messages in chronological order for a given thread.
|
|
760
|
+
|
|
761
|
+
:param thread_id: The ID of the thread.
|
|
762
|
+
:type thread_id: str
|
|
763
|
+
:return: A list of messages in chronological order.
|
|
764
|
+
"""
|
|
765
|
+
to_return = []
|
|
766
|
+
|
|
767
|
+
has_more = True
|
|
768
|
+
after = None
|
|
769
|
+
while has_more:
|
|
770
|
+
messages = self.project_client.agents.list_messages(
|
|
771
|
+
thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc", after=after)
|
|
772
|
+
has_more = messages.has_more
|
|
773
|
+
after = messages.last_id
|
|
774
|
+
if messages.data:
|
|
775
|
+
# We need to add the messages to the accumulator.
|
|
776
|
+
to_return.extend(messages.data)
|
|
777
|
+
|
|
778
|
+
return to_return
|
|
779
|
+
|
|
780
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
781
|
+
run_steps_chronological: List[object] = []
|
|
782
|
+
has_more = True
|
|
783
|
+
after = None
|
|
784
|
+
while has_more:
|
|
785
|
+
run_steps = self.project_client.agents.list_run_steps(
|
|
786
|
+
thread_id=thread_id,
|
|
787
|
+
run_id=run_id,
|
|
788
|
+
limit=self._AI_SERVICES_API_MAX_LIMIT,
|
|
789
|
+
order="asc",
|
|
790
|
+
after=after,
|
|
791
|
+
)
|
|
792
|
+
has_more = run_steps.has_more
|
|
793
|
+
after = run_steps.last_id
|
|
794
|
+
if run_steps.data:
|
|
795
|
+
# We need to add the run steps to the accumulator.
|
|
796
|
+
run_steps_chronological.extend(run_steps.data)
|
|
797
|
+
return run_steps_chronological
|
|
798
|
+
|
|
799
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
800
|
+
"""
|
|
801
|
+
Lists run IDs in chronological order for a given thread.
|
|
802
|
+
|
|
803
|
+
:param thread_id: The ID of the thread.
|
|
804
|
+
:type thread_id: str
|
|
805
|
+
:return: A list of run IDs in chronological order.
|
|
806
|
+
:rtype: List[str]
|
|
807
|
+
"""
|
|
808
|
+
runs = self.project_client.agents.list_runs(thread_id=thread_id, order="asc")
|
|
809
|
+
run_ids = [run["id"] for run in runs["data"]]
|
|
810
|
+
return run_ids
|
|
811
|
+
|
|
812
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
813
|
+
return self.project_client.agents.get_run(thread_id=thread_id, run_id=run_id)
|
|
814
|
+
|
|
815
|
+
@experimental
|
|
816
|
+
class FDPAgentDataRetriever(AIAgentDataRetriever):
|
|
817
|
+
|
|
818
|
+
def __init__(self, **kwargs):
|
|
819
|
+
super(FDPAgentDataRetriever, self).__init__(**kwargs)
|
|
820
|
+
|
|
821
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
822
|
+
"""
|
|
823
|
+
Lists messages in chronological order for a given thread.
|
|
824
|
+
|
|
825
|
+
:param thread_id: The ID of the thread.
|
|
826
|
+
:type thread_id: str
|
|
827
|
+
:return: A list of messages in chronological order.
|
|
828
|
+
"""
|
|
829
|
+
message_iter = self.project_client.agents.messages.list(
|
|
830
|
+
thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc"
|
|
831
|
+
)
|
|
832
|
+
return [message for message in message_iter]
|
|
833
|
+
|
|
834
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
835
|
+
|
|
836
|
+
return self.project_client.agents.run_steps.list(
|
|
837
|
+
thread_id=thread_id,
|
|
838
|
+
run_id=run_id,
|
|
839
|
+
limit=self._AI_SERVICES_API_MAX_LIMIT,
|
|
840
|
+
order="asc"
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
844
|
+
runs = self.project_client.agents.runs.list(thread_id=thread_id, order="asc")
|
|
845
|
+
return [run.id for run in runs]
|
|
846
|
+
|
|
847
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
848
|
+
return self.project_client.agents.runs.get(thread_id=thread_id, run_id=run_id)
|