azure-ai-evaluation 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (69) hide show
  1. azure/ai/evaluation/__init__.py +1 -0
  2. azure/ai/evaluation/_aoai/aoai_grader.py +1 -1
  3. azure/ai/evaluation/_aoai/label_grader.py +2 -2
  4. azure/ai/evaluation/_aoai/string_check_grader.py +2 -2
  5. azure/ai/evaluation/_aoai/text_similarity_grader.py +2 -2
  6. azure/ai/evaluation/_common/__init__.py +3 -1
  7. azure/ai/evaluation/_common/evaluation_onedp_client.py +50 -5
  8. azure/ai/evaluation/_common/onedp/operations/_operations.py +4 -2
  9. azure/ai/evaluation/_common/rai_service.py +7 -6
  10. azure/ai/evaluation/_converters/_ai_services.py +162 -118
  11. azure/ai/evaluation/_converters/_models.py +76 -6
  12. azure/ai/evaluation/_eval_mapping.py +2 -0
  13. azure/ai/evaluation/_evaluate/_evaluate.py +15 -17
  14. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +24 -5
  15. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
  16. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
  17. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
  18. azure/ai/evaluation/_evaluators/_common/_base_eval.py +4 -0
  19. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
  20. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
  21. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
  22. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
  23. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
  24. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +31 -29
  25. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
  26. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +10 -0
  27. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
  28. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +10 -0
  29. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +10 -0
  30. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
  31. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
  32. azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
  33. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +10 -0
  34. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +13 -0
  35. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
  36. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +14 -4
  37. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
  38. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +10 -0
  39. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -0
  40. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +80 -10
  41. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
  42. azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
  43. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +26 -7
  44. azure/ai/evaluation/_version.py +1 -1
  45. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  46. azure/ai/evaluation/red_team/_agent/_agent_functions.py +264 -0
  47. azure/ai/evaluation/red_team/_agent/_agent_tools.py +503 -0
  48. azure/ai/evaluation/red_team/_agent/_agent_utils.py +69 -0
  49. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +237 -0
  50. azure/ai/evaluation/red_team/_attack_strategy.py +2 -0
  51. azure/ai/evaluation/red_team/_red_team.py +572 -207
  52. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +121 -0
  53. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +570 -0
  54. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +108 -0
  55. azure/ai/evaluation/red_team/_utils/constants.py +5 -1
  56. azure/ai/evaluation/red_team/_utils/metric_mapping.py +2 -2
  57. azure/ai/evaluation/red_team/_utils/strategy_utils.py +2 -0
  58. azure/ai/evaluation/simulator/_adversarial_simulator.py +9 -2
  59. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  60. azure/ai/evaluation/simulator/_direct_attack_simulator.py +3 -3
  61. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +3 -3
  62. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +3 -0
  63. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +15 -7
  64. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +6 -5
  65. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/METADATA +35 -3
  66. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/RECORD +69 -61
  67. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/NOTICE.txt +0 -0
  68. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/WHEEL +0 -0
  69. {azure_ai_evaluation-1.6.0.dist-info → azure_ai_evaluation-1.8.0.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ from ._evaluators._xpia import IndirectAttackEvaluator
31
31
  from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
32
32
  from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
33
33
  from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
34
+ from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
34
35
  from ._model_configurations import (
35
36
  AzureAIProject,
36
37
  AzureOpenAIModelConfiguration,
@@ -77,7 +77,7 @@ class AzureOpenAIGrader():
77
77
  return AzureOpenAI(
78
78
  azure_endpoint=self._model_config["azure_endpoint"],
79
79
  api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
80
- api_version=self._model_config.get("api_version", DEFAULT_AOAI_API_VERSION),
80
+ api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
81
81
  azure_deployment=self._model_config.get("azure_deployment", ""),
82
82
  )
83
83
  from openai import OpenAI
@@ -4,7 +4,7 @@
4
4
  from typing import Any, Dict, Union, List
5
5
 
6
6
  from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
7
- from openai.types.eval_create_params import TestingCriterionLabelModel
7
+ from openai.types.graders import LabelModelGrader
8
8
  from azure.ai.evaluation._common._experimental import experimental
9
9
 
10
10
  from .aoai_grader import AzureOpenAIGrader
@@ -55,7 +55,7 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):
55
55
  passing_labels: List[str],
56
56
  **kwargs: Any
57
57
  ):
58
- grader = TestingCriterionLabelModel(
58
+ grader = LabelModelGrader(
59
59
  input=input,
60
60
  labels=labels,
61
61
  model=model,
@@ -5,7 +5,7 @@ from typing import Any, Dict, Union
5
5
  from typing_extensions import Literal
6
6
 
7
7
  from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8
- from openai.types.eval_string_check_grader import EvalStringCheckGrader
8
+ from openai.types.graders import StringCheckGrader
9
9
  from azure.ai.evaluation._common._experimental import experimental
10
10
 
11
11
  from .aoai_grader import AzureOpenAIGrader
@@ -55,7 +55,7 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
55
55
  reference: str,
56
56
  **kwargs: Any
57
57
  ):
58
- grader = EvalStringCheckGrader(
58
+ grader = StringCheckGrader(
59
59
  input=input,
60
60
  name=name,
61
61
  operation=operation,
@@ -5,7 +5,7 @@ from typing import Any, Dict, Union
5
5
  from typing_extensions import Literal
6
6
 
7
7
  from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
8
- from openai.types.eval_text_similarity_grader import EvalTextSimilarityGrader
8
+ from openai.types.graders import TextSimilarityGrader
9
9
  from azure.ai.evaluation._common._experimental import experimental
10
10
 
11
11
  from .aoai_grader import AzureOpenAIGrader
@@ -77,7 +77,7 @@ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
77
77
  name: str,
78
78
  **kwargs: Any
79
79
  ):
80
- grader = EvalTextSimilarityGrader(
80
+ grader = TextSimilarityGrader(
81
81
  evaluation_metric=evaluation_metric,
82
82
  input=input,
83
83
  pass_threshold=pass_threshold,
@@ -9,7 +9,7 @@ from . import constants
9
9
  from .rai_service import evaluate_with_rai_service
10
10
  from .utils import get_harm_severity_level
11
11
  from .evaluation_onedp_client import EvaluationServiceOneDPClient
12
- from .onedp.models import EvaluationUpload, EvaluationResult
12
+ from .onedp.models import EvaluationUpload, EvaluationResult, RedTeamUpload, ResultType
13
13
 
14
14
  __all__ = [
15
15
  "get_harm_severity_level",
@@ -18,4 +18,6 @@ __all__ = [
18
18
  "EvaluationServiceOneDPClient",
19
19
  "EvaluationResult",
20
20
  "EvaluationUpload",
21
+ "RedTeamUpload",
22
+ "ResultType",
21
23
  ]
@@ -7,7 +7,7 @@ from typing import Union, Any, Dict
7
7
  from azure.core.credentials import AzureKeyCredential, TokenCredential
8
8
  from azure.ai.evaluation._common.onedp import AIProjectClient as RestEvaluationServiceClient
9
9
  from azure.ai.evaluation._common.onedp.models import (PendingUploadRequest, PendingUploadType, EvaluationResult,
10
- ResultType, AssetCredentialRequest, EvaluationUpload, InputDataset)
10
+ ResultType, AssetCredentialRequest, EvaluationUpload, InputDataset, RedTeamUpload)
11
11
  from azure.storage.blob import ContainerClient
12
12
  from .utils import upload
13
13
 
@@ -22,7 +22,8 @@ class EvaluationServiceOneDPClient:
22
22
  **kwargs,
23
23
  )
24
24
 
25
- def create_evaluation_result(self, *, name: str, path: str, version=1, metrics: Dict[str, int]=None, **kwargs) -> EvaluationResult:
25
+ def create_evaluation_result(
26
+ self, *, name: str, path: str, version=1, metrics: Dict[str, int]=None, result_type: ResultType=ResultType.EVALUATION, **kwargs) -> EvaluationResult:
26
27
  """Create and upload evaluation results to Azure evaluation service.
27
28
 
28
29
  This method uploads evaluation results from a local path to Azure Blob Storage
@@ -39,14 +40,16 @@ class EvaluationServiceOneDPClient:
39
40
  :param version: The version number for the evaluation results, defaults to 1
40
41
  :type version: int, optional
41
42
  :param metrics: Metrics to be added to evaluation result
42
- :type version: Dict[str, int], optional
43
+ :type metrics: Dict[str, int], optional
44
+ :param result_type: Evaluation Result Type to create
45
+ :type result_type: ResultType, optional
43
46
  :param kwargs: Additional keyword arguments to pass to the underlying API calls
44
47
  :return: The response from creating the evaluation result version
45
48
  :rtype: EvaluationResult
46
49
  :raises: Various exceptions from the underlying API calls or upload process
47
50
  """
48
51
 
49
- LOGGER.debug(f"Creating evaluation result for {name} with version {version} from path {path}")
52
+ LOGGER.debug(f"Creating evaluation result for {name} with version {version} type {result_type} from path {path}")
50
53
  start_pending_upload_response = self.rest_client.evaluation_results.start_pending_upload(
51
54
  name=name,
52
55
  version=version,
@@ -63,7 +66,7 @@ class EvaluationServiceOneDPClient:
63
66
  create_version_response = self.rest_client.evaluation_results.create_or_update_version(
64
67
  body=EvaluationResult(
65
68
  blob_uri=start_pending_upload_response.blob_reference_for_consumption.blob_uri,
66
- result_type=ResultType.EVALUATION,
69
+ result_type=result_type,
67
70
  name=name,
68
71
  version=version,
69
72
  metrics=metrics,
@@ -115,4 +118,46 @@ class EvaluationServiceOneDPClient:
115
118
  **kwargs
116
119
  )
117
120
 
121
+ return update_run_response
122
+
123
+ def start_red_team_run(self, *, red_team: RedTeamUpload, **kwargs):
124
+ """Start a new red team run in the Azure evaluation service.
125
+
126
+ This method creates a new red team run with the provided configuration details.
127
+
128
+ :param red_team: The red team configuration to upload
129
+ :type red_team: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
130
+ :param kwargs: Additional keyword arguments to pass to the underlying API calls
131
+ :return: The created red team run object
132
+ :rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
133
+ :raises: Various exceptions from the underlying API calls
134
+ """
135
+ upload_run_response = self.rest_client.red_teams.upload_run(
136
+ redteam=red_team,
137
+ **kwargs
138
+ )
139
+
140
+ return upload_run_response
141
+
142
+ def update_red_team_run(self, *, name: str, red_team: RedTeamUpload, **kwargs):
143
+ """Update an existing red team run in the Azure evaluation service.
144
+
145
+ This method updates a red team run with new information such as status changes,
146
+ result references, or other metadata.
147
+
148
+ :param name: The identifier of the red team run to update
149
+ :type name: str
150
+ :param red_team: The updated red team configuration
151
+ :type red_team: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
152
+ :param kwargs: Additional keyword arguments to pass to the underlying API calls
153
+ :return: The updated red team run object
154
+ :rtype: ~azure.ai.evaluation._common.onedp.models.RedTeamUpload
155
+ :raises: Various exceptions from the underlying API calls
156
+ """
157
+ update_run_response = self.rest_client.red_teams.upload_update_run(
158
+ name=name,
159
+ redteam=red_team,
160
+ **kwargs
161
+ )
162
+
118
163
  return update_run_response
@@ -2101,8 +2101,10 @@ class EvaluationsOperations:
2101
2101
 
2102
2102
  if _stream:
2103
2103
  deserialized = response.iter_bytes()
2104
- else:
2104
+ elif type(response.json()) == list:
2105
2105
  deserialized = _deserialize(List[Dict[str, Any]], response.json())
2106
+ else:
2107
+ deserialized = _deserialize(Dict[str, Any], response.json())
2106
2108
 
2107
2109
  if cls:
2108
2110
  return cls(pipeline_response, deserialized, {}) # type: ignore
@@ -4267,7 +4269,7 @@ class RedTeamsOperations:
4267
4269
  if isinstance(redteam, (IOBase, bytes)):
4268
4270
  _content = redteam
4269
4271
  else:
4270
- _content = json.dumps(redteam, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
4272
+ _content = json.dumps(redteam, cls=SdkJSONEncoder, exclude_readonly=False) # type: ignore
4271
4273
 
4272
4274
  _request = build_red_teams_upload_update_run_request(
4273
4275
  name=name,
@@ -629,8 +629,9 @@ async def evaluate_with_rai_service(
629
629
  :type data: dict
630
630
  :param metric_name: The evaluation metric to use.
631
631
  :type metric_name: str
632
- :param project_scope: The Azure AI project scope details.
633
- :type project_scope: Dict
632
+ :param project_scope: The Azure AI project, which can either be a string representing the project endpoint
633
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
634
+ :type project_scope: Union[str, AzureAIProject]
634
635
  :param credential: The Azure authentication credential.
635
636
  :type credential: ~azure.core.credentials.TokenCredential
636
637
  :param annotation_task: The annotation task to use.
@@ -777,11 +778,11 @@ async def evaluate_with_rai_service_multimodal(
777
778
  :type messages: str
778
779
  :param metric_name: The evaluation metric to use.
779
780
  :type metric_name: str
780
- :param project_scope: The Azure AI project scope details.
781
- :type project_scope: Dict
781
+ :param project_scope: The Azure AI project, which can either be a string representing the project endpoint
782
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
783
+ :type project_scope: Union[str, AzureAIProject]
782
784
  :param credential: The Azure authentication credential.
783
- :type credential:
784
- ~azure.core.credentials.TokenCredential
785
+ :type credential: ~azure.core.credentials.TokenCredential
785
786
  :return: The parsed annotation result.
786
787
  :rtype: List[List[Dict]]
787
788
  """
@@ -1,21 +1,17 @@
1
1
  import json
2
+ from abc import abstractmethod
2
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
4
 
5
+ from azure.ai.projects import __version__ as projects_version
4
6
  from azure.ai.projects import AIProjectClient
5
- from azure.ai.projects.models import (
6
- ThreadRun,
7
- RunStep,
8
- RunStepToolCallDetails,
9
- FunctionDefinition,
10
- ListSortOrder,
11
- )
12
7
 
13
8
  from typing import List, Union
14
9
 
15
10
  from azure.ai.evaluation._common._experimental import experimental
11
+ from packaging.version import Version
16
12
 
17
13
  # Constants.
18
- from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION
14
+ from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION, _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
19
15
 
20
16
  # Message instances.
21
17
  from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
@@ -26,61 +22,20 @@ from ._models import ToolDefinition, EvaluatorData
26
22
  # Utilities.
27
23
  from ._models import break_tool_call_into_messages, convert_message
28
24
 
29
- # Maximum items to fetch in a single AI Services API call (imposed by the service).
30
- _AI_SERVICES_API_MAX_LIMIT = 100
31
-
32
- # Maximum number of workers allowed to make API calls at the same time.
33
- _MAX_WORKERS = 10
34
-
35
- # Constants to only be used internally in this file for the built-in tools.
36
- _CODE_INTERPRETER = "code_interpreter"
37
- _BING_GROUNDING = "bing_grounding"
38
- _FILE_SEARCH = "file_search"
39
-
40
- # Built-in tool descriptions and parameters are hidden, but we include basic descriptions
41
- # for evaluation purposes.
42
- _BUILT_IN_DESCRIPTIONS = {
43
- _CODE_INTERPRETER: "Use code interpreter to read and interpret information from datasets, "
44
- + "generate code, and create graphs and charts using your data. Supports "
45
- + "up to 20 files.",
46
- _BING_GROUNDING: "Enhance model output with web data.",
47
- _FILE_SEARCH: "Search for data across uploaded files.",
48
- }
49
-
50
- # Built-in tool parameters are hidden, but we include basic parameters for evaluation purposes.
51
- _BUILT_IN_PARAMS = {
52
- _CODE_INTERPRETER: {
53
- "type": "object",
54
- "properties": {"input": {"type": "string", "description": "Generated code to be executed."}},
55
- },
56
- _BING_GROUNDING: {
57
- "type": "object",
58
- "properties": {"requesturl": {"type": "string", "description": "URL used in Bing Search API."}},
59
- },
60
- _FILE_SEARCH: {
61
- "type": "object",
62
- "properties": {
63
- "ranking_options": {
64
- "type": "object",
65
- "properties": {
66
- "ranker": {"type": "string", "description": "Ranking algorithm to use."},
67
- "score_threshold": {"type": "number", "description": "Threshold for search results."},
68
- },
69
- "description": "Ranking options for search results.",
70
- }
71
- },
72
- },
73
- }
74
25
 
75
26
  @experimental
76
27
  class AIAgentConverter:
77
28
  """
78
- A converter for AI agent data.
29
+ A converter for AI agent data. Data retrieval classes handle getting agent data depending on
30
+ agent version.
79
31
 
80
32
  :param project_client: The AI project client used for API interactions.
81
33
  :type project_client: AIProjectClient
82
34
  """
83
35
 
36
+ # Maximum number of workers allowed to make API calls at the same time.
37
+ _MAX_WORKERS = 10
38
+
84
39
  def __init__(self, project_client: AIProjectClient):
85
40
  """
86
41
  Initializes the AIAgentConverter with the given AI project client.
@@ -89,30 +44,16 @@ class AIAgentConverter:
89
44
  :type project_client: AIProjectClient
90
45
  """
91
46
  self.project_client = project_client
47
+ self._data_retriever = AIAgentConverter._get_data_retriever(project_client=project_client)
92
48
 
93
- def _list_messages_chronological(self, thread_id: str):
94
- """
95
- Lists messages in chronological order for a given thread.
96
-
97
- :param thread_id: The ID of the thread.
98
- :type thread_id: str
99
- :return: A list of messages in chronological order.
100
- """
101
- to_return = []
102
-
103
- has_more = True
104
- after = None
105
- while has_more:
106
- messages = self.project_client.agents.list_messages(
107
- thread_id=thread_id, limit=_AI_SERVICES_API_MAX_LIMIT, order=ListSortOrder.ASCENDING, after=after
108
- )
109
- has_more = messages.has_more
110
- after = messages.last_id
111
- if messages.data:
112
- # We need to add the messages to the accumulator.
113
- to_return.extend(messages.data)
114
-
115
- return to_return
49
+ @staticmethod
50
+ def _get_data_retriever(project_client: AIProjectClient):
51
+ if project_client is None:
52
+ return None
53
+ if Version(projects_version) > Version("1.0.0b10"):
54
+ return FDPAgentDataRetriever(project_client=project_client)
55
+ else:
56
+ return LegacyAgentDataRetriever(project_client=project_client)
116
57
 
117
58
  def _list_tool_calls_chronological(self, thread_id: str, run_id: str) -> List[ToolCall]:
118
59
  """
@@ -127,29 +68,14 @@ class AIAgentConverter:
127
68
  """
128
69
  # This is the other API request that we need to make to AI service, such that we can get the details about
129
70
  # the tool calls and results. Since the list is given in reverse chronological order, we need to reverse it.
130
- run_steps_chronological: List[RunStep] = []
131
- has_more = True
132
- after = None
133
- while has_more:
134
- run_steps = self.project_client.agents.list_run_steps(
135
- thread_id=thread_id,
136
- run_id=run_id,
137
- limit=_AI_SERVICES_API_MAX_LIMIT,
138
- order=ListSortOrder.ASCENDING,
139
- after=after,
140
- )
141
- has_more = run_steps.has_more
142
- after = run_steps.last_id
143
- if run_steps.data:
144
- # We need to add the run steps to the accumulator.
145
- run_steps_chronological.extend(run_steps.data)
71
+ run_steps_chronological = self._data_retriever._list_run_steps_chronological(thread_id=thread_id, run_id=run_id)
146
72
 
147
73
  # Let's accumulate the function calls in chronological order. Function calls
148
74
  tool_calls_chronological: List[ToolCall] = []
149
75
  for run_step_chronological in run_steps_chronological:
150
76
  if run_step_chronological.type != _TOOL_CALLS:
151
77
  continue
152
- step_details: RunStepToolCallDetails = run_step_chronological.step_details
78
+ step_details: object = run_step_chronological.step_details
153
79
  if step_details.type != _TOOL_CALLS:
154
80
  continue
155
81
  if len(step_details.tool_calls) < 1:
@@ -166,26 +92,13 @@ class AIAgentConverter:
166
92
 
167
93
  return tool_calls_chronological
168
94
 
169
- def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
170
- """
171
- Lists run IDs in chronological order for a given thread.
172
-
173
- :param thread_id: The ID of the thread.
174
- :type thread_id: str
175
- :return: A list of run IDs in chronological order.
176
- :rtype: List[str]
177
- """
178
- runs = self.project_client.agents.list_runs(thread_id=thread_id, order=ListSortOrder.ASCENDING)
179
- run_ids = [run["id"] for run in runs["data"]]
180
- return run_ids
181
-
182
95
  @staticmethod
183
- def _extract_function_tool_definitions(thread_run: ThreadRun) -> List[ToolDefinition]:
96
+ def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinition]:
184
97
  """
185
98
  Extracts tool definitions from a thread run.
186
99
 
187
100
  :param thread_run: The thread run containing tool definitions.
188
- :type thread_run: ThreadRun
101
+ :type thread_run: object
189
102
  :return: A list of tool definitions extracted from the thread run.
190
103
  :rtype: List[ToolDefinition]
191
104
  """
@@ -202,6 +115,7 @@ class AIAgentConverter:
202
115
 
203
116
  final_tools.append(
204
117
  ToolDefinition(
118
+ type="function",
205
119
  name=tool_function.name,
206
120
  description=tool_function.description,
207
121
  parameters=parameters,
@@ -213,6 +127,7 @@ class AIAgentConverter:
213
127
  if tool.type in _BUILT_IN_DESCRIPTIONS and tool.type in _BUILT_IN_PARAMS:
214
128
  final_tools.append(
215
129
  ToolDefinition(
130
+ type=tool.type,
216
131
  name=tool.type,
217
132
  description=_BUILT_IN_DESCRIPTIONS[tool.type],
218
133
  parameters=_BUILT_IN_PARAMS[tool.type],
@@ -406,12 +321,12 @@ class AIAgentConverter:
406
321
  # We set the include_run_id to False, since we don't want to include the current run's tool calls, which
407
322
  # are already included in the previous step.
408
323
  run_ids_up_to_run_id = AIAgentConverter._filter_run_ids_up_to_run_id(
409
- self._list_run_ids_chronological(thread_id), run_id, include_run_id=False
324
+ self._data_retriever._list_run_ids_chronological(thread_id), run_id, include_run_id=False
410
325
  )
411
326
 
412
327
  # Since each _list_tool_calls_chronological call is expensive, we can use a thread pool to speed
413
328
  # up the process by parallelizing the AI Services API requests.
414
- with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
329
+ with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
415
330
  futures = {
416
331
  executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id
417
332
  for run_id in run_ids_up_to_run_id
@@ -437,7 +352,7 @@ class AIAgentConverter:
437
352
  """
438
353
  to_return: List[Message] = []
439
354
 
440
- with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
355
+ with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
441
356
  futures = {executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id for run_id in run_ids}
442
357
  for future in as_completed(futures):
443
358
  to_return.extend(future.result())
@@ -498,10 +413,10 @@ class AIAgentConverter:
498
413
  :rtype: dict
499
414
  """
500
415
  # Make the API call once and reuse the result.
501
- thread_run: ThreadRun = self.project_client.agents.get_run(thread_id=thread_id, run_id=run_id)
416
+ thread_run: object = self._data_retriever._get_run(thread_id=thread_id, run_id=run_id)
502
417
 
503
418
  # Walk through the "user-facing" conversation history and start adding messages.
504
- chronological_conversation = self._list_messages_chronological(thread_id)
419
+ chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
505
420
 
506
421
  # Since this is Xth run of out possibly N runs, we are only interested is messages that are before the run X.
507
422
  chrono_until_run_id = AIAgentConverter._filter_messages_up_to_run_id(chronological_conversation, run_id)
@@ -557,14 +472,14 @@ class AIAgentConverter:
557
472
  list_of_run_evaluations: List[dict] = []
558
473
 
559
474
  # These are all the run IDs.
560
- run_ids = self._list_run_ids_chronological(thread_id)
475
+ run_ids = self._data_retriever._list_run_ids_chronological(thread_id)
561
476
 
562
477
  # If there were no messages in the thread, we can return an empty list.
563
478
  if len(run_ids) < 1:
564
479
  return list_of_run_evaluations
565
480
 
566
481
  # These are all the messages.
567
- chronological_conversation = self._list_messages_chronological(thread_id)
482
+ chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
568
483
 
569
484
  # If there are no messages in the thread, we can return an empty list.
570
485
  if len(chronological_conversation) < 1:
@@ -574,7 +489,7 @@ class AIAgentConverter:
574
489
  all_sorted_tool_calls = AIAgentConverter._sort_messages(self._retrieve_all_tool_calls(thread_id, run_ids))
575
490
 
576
491
  # The last run should have all the tool definitions.
577
- thread_run = self.project_client.agents.get_run(thread_id=thread_id, run_id=run_ids[-1])
492
+ thread_run = self._data_retriever._get_run(thread_id=thread_id, run_id=run_ids[-1])
578
493
  instructions = thread_run.instructions
579
494
 
580
495
  # So then we can get the tool definitions.
@@ -647,7 +562,7 @@ class AIAgentConverter:
647
562
  return self._prepare_single_thread_evaluation_data(thread_id=thread_ids, filename=filename)
648
563
 
649
564
  evaluations = []
650
- with ThreadPoolExecutor(max_workers=_MAX_WORKERS) as executor:
565
+ with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
651
566
  # We override the filename, because we don't want to write the file for each thread, having to handle
652
567
  # threading issues and file being opened from multiple threads, instead, we just want to write it once
653
568
  # at the end.
@@ -802,3 +717,132 @@ class AIAgentConverter:
802
717
  data = json.load(file)
803
718
 
804
719
  return AIAgentConverter._convert_from_conversation(data, run_id)
720
+
721
+ @experimental
722
+ class AIAgentDataRetriever:
723
+ # Maximum items to fetch in a single AI Services API call (imposed by the service).
724
+ _AI_SERVICES_API_MAX_LIMIT = 100
725
+
726
+ def __init__(self, project_client: AIProjectClient):
727
+ """
728
+ Initializes the AIAgentDataRetriever with the given AI project client.
729
+
730
+ :param project_client: The AI project client used for API interactions.
731
+ :type project_client: AIProjectClient
732
+ """
733
+ self.project_client = project_client
734
+
735
+ @abstractmethod
736
+ def _get_run(self, thread_id: str, run_id: str):
737
+ pass
738
+
739
+ @abstractmethod
740
+ def _list_messages_chronological(self, thread_id: str):
741
+ pass
742
+
743
+ @abstractmethod
744
+ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
745
+ pass
746
+
747
+ @abstractmethod
748
+ def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
749
+ pass
750
+
751
+ @experimental
752
+ class LegacyAgentDataRetriever(AIAgentDataRetriever):
753
+
754
+ def __init__(self, **kwargs):
755
+ super(LegacyAgentDataRetriever, self).__init__(**kwargs)
756
+
757
+ def _list_messages_chronological(self, thread_id: str):
758
+ """
759
+ Lists messages in chronological order for a given thread.
760
+
761
+ :param thread_id: The ID of the thread.
762
+ :type thread_id: str
763
+ :return: A list of messages in chronological order.
764
+ """
765
+ to_return = []
766
+
767
+ has_more = True
768
+ after = None
769
+ while has_more:
770
+ messages = self.project_client.agents.list_messages(
771
+ thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc", after=after)
772
+ has_more = messages.has_more
773
+ after = messages.last_id
774
+ if messages.data:
775
+ # We need to add the messages to the accumulator.
776
+ to_return.extend(messages.data)
777
+
778
+ return to_return
779
+
780
+ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
781
+ run_steps_chronological: List[object] = []
782
+ has_more = True
783
+ after = None
784
+ while has_more:
785
+ run_steps = self.project_client.agents.list_run_steps(
786
+ thread_id=thread_id,
787
+ run_id=run_id,
788
+ limit=self._AI_SERVICES_API_MAX_LIMIT,
789
+ order="asc",
790
+ after=after,
791
+ )
792
+ has_more = run_steps.has_more
793
+ after = run_steps.last_id
794
+ if run_steps.data:
795
+ # We need to add the run steps to the accumulator.
796
+ run_steps_chronological.extend(run_steps.data)
797
+ return run_steps_chronological
798
+
799
+ def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
800
+ """
801
+ Lists run IDs in chronological order for a given thread.
802
+
803
+ :param thread_id: The ID of the thread.
804
+ :type thread_id: str
805
+ :return: A list of run IDs in chronological order.
806
+ :rtype: List[str]
807
+ """
808
+ runs = self.project_client.agents.list_runs(thread_id=thread_id, order="asc")
809
+ run_ids = [run["id"] for run in runs["data"]]
810
+ return run_ids
811
+
812
+ def _get_run(self, thread_id: str, run_id: str):
813
+ return self.project_client.agents.get_run(thread_id=thread_id, run_id=run_id)
814
+
815
+ @experimental
816
+ class FDPAgentDataRetriever(AIAgentDataRetriever):
817
+
818
+ def __init__(self, **kwargs):
819
+ super(FDPAgentDataRetriever, self).__init__(**kwargs)
820
+
821
+ def _list_messages_chronological(self, thread_id: str):
822
+ """
823
+ Lists messages in chronological order for a given thread.
824
+
825
+ :param thread_id: The ID of the thread.
826
+ :type thread_id: str
827
+ :return: A list of messages in chronological order.
828
+ """
829
+ message_iter = self.project_client.agents.messages.list(
830
+ thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc"
831
+ )
832
+ return [message for message in message_iter]
833
+
834
+ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
835
+
836
+ return self.project_client.agents.run_steps.list(
837
+ thread_id=thread_id,
838
+ run_id=run_id,
839
+ limit=self._AI_SERVICES_API_MAX_LIMIT,
840
+ order="asc"
841
+ )
842
+
843
+ def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
844
+ runs = self.project_client.agents.runs.list(thread_id=thread_id, order="asc")
845
+ return [run.id for run in runs]
846
+
847
+ def _get_run(self, thread_id: str, run_id: str):
848
+ return self.project_client.agents.runs.get(thread_id=thread_id, run_id=run_id)