azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,899 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
4
|
+
|
|
5
|
+
from azure.ai.projects import __version__ as projects_version
|
|
6
|
+
from azure.ai.projects import AIProjectClient
|
|
7
|
+
|
|
8
|
+
from typing import List, Union
|
|
9
|
+
|
|
10
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
11
|
+
from packaging.version import Version
|
|
12
|
+
|
|
13
|
+
# Constants.
|
|
14
|
+
from ._models import (
|
|
15
|
+
_USER,
|
|
16
|
+
_AGENT,
|
|
17
|
+
_TOOL,
|
|
18
|
+
_TOOL_CALL,
|
|
19
|
+
_TOOL_CALLS,
|
|
20
|
+
_FUNCTION,
|
|
21
|
+
_BUILT_IN_DESCRIPTIONS,
|
|
22
|
+
_BUILT_IN_PARAMS,
|
|
23
|
+
_OPENAPI,
|
|
24
|
+
OpenAPIToolDefinition,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Message instances.
|
|
28
|
+
from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
|
|
29
|
+
|
|
30
|
+
# Intermediate definitions to hold results.
|
|
31
|
+
from ._models import ToolDefinition, EvaluatorData
|
|
32
|
+
|
|
33
|
+
# Utilities.
|
|
34
|
+
from ._models import break_tool_call_into_messages, convert_message
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@experimental
|
|
38
|
+
class AIAgentConverter:
|
|
39
|
+
"""
|
|
40
|
+
A converter for AI agent data. Data retrieval classes handle getting agent data depending on
|
|
41
|
+
agent version.
|
|
42
|
+
|
|
43
|
+
:param project_client: The AI project client used for API interactions.
|
|
44
|
+
:type project_client: AIProjectClient
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Maximum number of workers allowed to make API calls at the same time.
|
|
48
|
+
_MAX_WORKERS = 10
|
|
49
|
+
|
|
50
|
+
def __init__(self, project_client: AIProjectClient):
|
|
51
|
+
"""
|
|
52
|
+
Initializes the AIAgentConverter with the given AI project client.
|
|
53
|
+
|
|
54
|
+
:param project_client: The AI project client used for API interactions.
|
|
55
|
+
:type project_client: AIProjectClient
|
|
56
|
+
"""
|
|
57
|
+
self.project_client = project_client
|
|
58
|
+
self._data_retriever = AIAgentConverter._get_data_retriever(project_client=project_client)
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _get_data_retriever(project_client: AIProjectClient):
|
|
62
|
+
if project_client is None:
|
|
63
|
+
return None
|
|
64
|
+
if Version(projects_version) > Version("1.0.0b10"):
|
|
65
|
+
return FDPAgentDataRetriever(project_client=project_client)
|
|
66
|
+
else:
|
|
67
|
+
return LegacyAgentDataRetriever(project_client=project_client)
|
|
68
|
+
|
|
69
|
+
def _list_tool_calls_chronological(self, thread_id: str, run_id: str) -> List[ToolCall]:
|
|
70
|
+
"""
|
|
71
|
+
Lists tool calls in chronological order for a given thread and run.
|
|
72
|
+
|
|
73
|
+
:param thread_id: The ID of the thread.
|
|
74
|
+
:type thread_id: str
|
|
75
|
+
:param run_id: The ID of the run.
|
|
76
|
+
:type run_id: str
|
|
77
|
+
:return: A list of tool calls in chronological order.
|
|
78
|
+
:rtype: List[ToolCall]
|
|
79
|
+
"""
|
|
80
|
+
# This is the other API request that we need to make to AI service, such that we can get the details about
|
|
81
|
+
# the tool calls and results. Since the list is given in reverse chronological order, we need to reverse it.
|
|
82
|
+
run_steps_chronological = self._data_retriever._list_run_steps_chronological(thread_id=thread_id, run_id=run_id)
|
|
83
|
+
|
|
84
|
+
# Let's accumulate the function calls in chronological order. Function calls
|
|
85
|
+
tool_calls_chronological: List[ToolCall] = []
|
|
86
|
+
for run_step_chronological in run_steps_chronological:
|
|
87
|
+
if run_step_chronological.type != _TOOL_CALLS:
|
|
88
|
+
continue
|
|
89
|
+
step_details: object = run_step_chronological.step_details
|
|
90
|
+
if step_details.type != _TOOL_CALLS:
|
|
91
|
+
continue
|
|
92
|
+
if len(step_details.tool_calls) < 1:
|
|
93
|
+
continue
|
|
94
|
+
for tool_call in step_details.tool_calls:
|
|
95
|
+
# We need to add the tool call and the result as two separate messages.
|
|
96
|
+
tool_calls_chronological.append(
|
|
97
|
+
ToolCall(
|
|
98
|
+
created=run_step_chronological.created_at,
|
|
99
|
+
completed=run_step_chronological.completed_at,
|
|
100
|
+
details=tool_call,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return tool_calls_chronological
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def _extract_function_tool_definitions(thread_run: object) -> List[Union[ToolDefinition, OpenAPIToolDefinition]]:
|
|
108
|
+
"""
|
|
109
|
+
Extracts tool definitions from a thread run.
|
|
110
|
+
|
|
111
|
+
:param thread_run: The thread run containing tool definitions.
|
|
112
|
+
:type thread_run: object
|
|
113
|
+
:return: A list of tool definitions extracted from the thread run.
|
|
114
|
+
:rtype: List[ToolDefinition]
|
|
115
|
+
"""
|
|
116
|
+
final_tools: List[ToolDefinition] = []
|
|
117
|
+
for tool in thread_run.tools:
|
|
118
|
+
# Here we handle the custom functions and create tool definitions out of them.
|
|
119
|
+
if tool.type == _FUNCTION:
|
|
120
|
+
tool_function: FunctionDefinition = tool.function
|
|
121
|
+
parameters = tool_function.parameters
|
|
122
|
+
|
|
123
|
+
# The target schema doesn't support required fields, so we omit it for now.
|
|
124
|
+
if "required" in parameters:
|
|
125
|
+
del parameters["required"]
|
|
126
|
+
|
|
127
|
+
final_tools.append(
|
|
128
|
+
ToolDefinition(
|
|
129
|
+
type="function",
|
|
130
|
+
name=tool_function.name,
|
|
131
|
+
description=tool_function.description,
|
|
132
|
+
parameters=parameters,
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
elif tool.type == _OPENAPI:
|
|
136
|
+
openapi_tool = tool.openapi
|
|
137
|
+
tool_definition = OpenAPIToolDefinition(
|
|
138
|
+
name=openapi_tool.name,
|
|
139
|
+
description=openapi_tool.description,
|
|
140
|
+
type=_OPENAPI,
|
|
141
|
+
spec=openapi_tool.spec,
|
|
142
|
+
auth=openapi_tool.auth.as_dict(),
|
|
143
|
+
default_params=openapi_tool.default_params.as_dict() if openapi_tool.default_params else None,
|
|
144
|
+
functions=[
|
|
145
|
+
ToolDefinition(
|
|
146
|
+
name=func.get("name"),
|
|
147
|
+
description=func.get("description"),
|
|
148
|
+
parameters=func.get("parameters"),
|
|
149
|
+
type="function",
|
|
150
|
+
)
|
|
151
|
+
for func in openapi_tool.get("functions")
|
|
152
|
+
],
|
|
153
|
+
)
|
|
154
|
+
final_tools.append(tool_definition)
|
|
155
|
+
else:
|
|
156
|
+
# Add limited support for built-in tools. Descriptions and parameters
|
|
157
|
+
# are not published, but we'll include placeholders.
|
|
158
|
+
if tool.type in _BUILT_IN_DESCRIPTIONS and tool.type in _BUILT_IN_PARAMS:
|
|
159
|
+
final_tools.append(
|
|
160
|
+
ToolDefinition(
|
|
161
|
+
type=tool.type,
|
|
162
|
+
name=tool.type,
|
|
163
|
+
description=_BUILT_IN_DESCRIPTIONS[tool.type],
|
|
164
|
+
parameters=_BUILT_IN_PARAMS[tool.type],
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
return final_tools
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def _break_into_query_responses(messages: List[Message], run_id: str) -> (List[Message], List[Message]):
|
|
171
|
+
"""
|
|
172
|
+
Breaks a list of messages into query and response messages based on the run ID.
|
|
173
|
+
|
|
174
|
+
:param messages: The list of messages to be broken into query and response.
|
|
175
|
+
:type messages: List[Message]
|
|
176
|
+
:param run_id: The ID of the run to distinguish response messages.
|
|
177
|
+
:type run_id: str
|
|
178
|
+
:return: A tuple containing two lists - the first list contains query messages, and the second list contains response messages.
|
|
179
|
+
:rtype: (List[Message], List[Message])
|
|
180
|
+
"""
|
|
181
|
+
query: List[Message] = [what for what in messages if what.run_id != run_id]
|
|
182
|
+
responses: List[Message] = [what for what in messages if what.run_id == run_id]
|
|
183
|
+
return query, responses
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _filter_run_ids_up_to_run_id(run_ids: List[str], run_id: str, include_run_id: bool = True) -> List[str]:
|
|
187
|
+
"""
|
|
188
|
+
Filters run IDs up to a specific run ID.
|
|
189
|
+
|
|
190
|
+
This method processes a list of run IDs and filters out run IDs that come after the specified run ID.
|
|
191
|
+
It ensures that only run IDs up to and including the specified run ID are included in the result.
|
|
192
|
+
|
|
193
|
+
:param run_ids: The list of run IDs in chronological order.
|
|
194
|
+
:type run_ids: List[str]
|
|
195
|
+
:param run_id: The ID of the run to filter messages up to.
|
|
196
|
+
:type run_id: str
|
|
197
|
+
:return: The filtered list of run IDs up to the specified run ID.
|
|
198
|
+
:rtype: List[str]
|
|
199
|
+
"""
|
|
200
|
+
for index, single_run_id in enumerate(run_ids):
|
|
201
|
+
# Since this is the conversation of the entire thread and we are interested in a given run, we need to
|
|
202
|
+
# filter out the messages that came after the run.
|
|
203
|
+
if single_run_id == run_id:
|
|
204
|
+
if include_run_id:
|
|
205
|
+
return run_ids[: index + 1]
|
|
206
|
+
return run_ids[:index]
|
|
207
|
+
|
|
208
|
+
# If we didn't find the run_id, we return an empty list.
|
|
209
|
+
return []
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
def _filter_messages_up_to_run_id(
|
|
213
|
+
chronological_messages, run_id: str, include_run_id: bool = True
|
|
214
|
+
) -> List[Message]:
|
|
215
|
+
"""
|
|
216
|
+
Filters messages up to a specific run ID.
|
|
217
|
+
|
|
218
|
+
This method processes a list of messages in chronological order and filters out messages that come after the specified run ID.
|
|
219
|
+
It ensures that only messages up to and including the specified run ID are included in the result.
|
|
220
|
+
|
|
221
|
+
:param chronological_messages: The list of messages in chronological order.
|
|
222
|
+
:type chronological_messages: List[Message]
|
|
223
|
+
:param run_id: The ID of the run to filter messages up to.
|
|
224
|
+
:type run_id: str
|
|
225
|
+
:return: The filtered list of messages up to the specified run ID.
|
|
226
|
+
:rtype: List[Message]
|
|
227
|
+
"""
|
|
228
|
+
filtered_messages = []
|
|
229
|
+
in_my_current_run = False
|
|
230
|
+
for single_turn in chronological_messages:
|
|
231
|
+
# Since this is the conversation of the entire thread and we are interested in a given run, we need to
|
|
232
|
+
# filter out the messages that came after the run.
|
|
233
|
+
if single_turn.run_id is not None:
|
|
234
|
+
if single_turn.run_id == run_id:
|
|
235
|
+
in_my_current_run = True
|
|
236
|
+
|
|
237
|
+
# If we entered our current run and its the run that we have requested to filter up to, but
|
|
238
|
+
# not including, we can break out of the loop.
|
|
239
|
+
if not include_run_id:
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
# Then, if we think that we are currently in our run and we have a message that is not from our run,
|
|
243
|
+
# it means that we have left our run.
|
|
244
|
+
if in_my_current_run and single_turn.run_id != run_id:
|
|
245
|
+
break
|
|
246
|
+
|
|
247
|
+
# We're good to add it.
|
|
248
|
+
filtered_messages.append(single_turn)
|
|
249
|
+
|
|
250
|
+
return filtered_messages
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _extract_typed_messages(ai_services_messages) -> List[Message]:
|
|
254
|
+
"""
|
|
255
|
+
Extracts and converts AI service messages to a list of typed Message objects.
|
|
256
|
+
|
|
257
|
+
This method processes a list of messages from the AI service, converting them into
|
|
258
|
+
appropriate Message subclass instances (UserMessage, AssistantMessage) based on their role.
|
|
259
|
+
It filters out messages without content and handles different message roles accordingly.
|
|
260
|
+
|
|
261
|
+
:param ai_services_messages: A list of messages from the AI service.
|
|
262
|
+
:type ai_services_messages: _models.OpenAIPageableListOfThreadMessage (some internal type from ai projects)
|
|
263
|
+
:return: A list of typed Message objects.
|
|
264
|
+
:rtype: List[Message]
|
|
265
|
+
"""
|
|
266
|
+
# We will collect messages in this accumulator.
|
|
267
|
+
final_messages: List[Message] = []
|
|
268
|
+
|
|
269
|
+
# Each visible message in the conversation is a message from the user or the assistant, we collect
|
|
270
|
+
# both the text and timestamp, so we can recreate the chronological order.
|
|
271
|
+
for single_turn in ai_services_messages:
|
|
272
|
+
# This shouldn't really happen, ever. What's the point of a message without content? But to avoid a nasty
|
|
273
|
+
# crash on one of the historical messages, let's check for it and bail out from this iteration.
|
|
274
|
+
if len(single_turn.content) < 1:
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
content_list = []
|
|
278
|
+
# If content is a list, process all content items.
|
|
279
|
+
for content_item in single_turn.content:
|
|
280
|
+
if content_item.type == "text":
|
|
281
|
+
content_list.append(
|
|
282
|
+
{
|
|
283
|
+
"type": "text",
|
|
284
|
+
"text": content_item.text.value,
|
|
285
|
+
}
|
|
286
|
+
)
|
|
287
|
+
elif content_item.type == "image":
|
|
288
|
+
content_list.append(
|
|
289
|
+
{
|
|
290
|
+
"type": "image",
|
|
291
|
+
"image": {
|
|
292
|
+
"file_id": content_item.image_file.file_id,
|
|
293
|
+
},
|
|
294
|
+
}
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# If we have a user message, then we save it as such and since it's a human message, there is no
|
|
298
|
+
# run_id associated with it.
|
|
299
|
+
if single_turn.role == _USER:
|
|
300
|
+
final_messages.append(UserMessage(content=content_list, createdAt=single_turn.created_at))
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# In this case, we have an assistant message. Unfortunately, this would only have the user-facing
|
|
304
|
+
# agent's response, without any details on what tool was called, with what parameters, and what
|
|
305
|
+
# the result was. That will be added later in the method.
|
|
306
|
+
if single_turn.role == _AGENT:
|
|
307
|
+
# We are required to put the run_id in the assistant message.
|
|
308
|
+
final_messages.append(
|
|
309
|
+
AssistantMessage(content=content_list, run_id=single_turn.run_id, createdAt=single_turn.created_at)
|
|
310
|
+
)
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
return final_messages
|
|
314
|
+
|
|
315
|
+
def _fetch_tool_calls(self, thread_id: str, run_id: str) -> List[Message]:
|
|
316
|
+
"""
|
|
317
|
+
Fetches tool calls for a given thread and run, and converts them into messages.
|
|
318
|
+
|
|
319
|
+
This method retrieves tool calls for a specified thread and run, converts them into messages using the
|
|
320
|
+
`break_tool_call_into_messages` utility function, and returns the list of messages.
|
|
321
|
+
|
|
322
|
+
:param thread_id: The ID of the thread.
|
|
323
|
+
:type thread_id: str
|
|
324
|
+
:param run_id: The ID of the run.
|
|
325
|
+
:type run_id: str
|
|
326
|
+
:return: A list of messages converted from tool calls.
|
|
327
|
+
:rtype: List[Message]
|
|
328
|
+
"""
|
|
329
|
+
tool_calls: List[Message] = []
|
|
330
|
+
for chrono_tool_call in self._list_tool_calls_chronological(thread_id, run_id):
|
|
331
|
+
tool_calls.extend(break_tool_call_into_messages(chrono_tool_call, run_id))
|
|
332
|
+
return tool_calls
|
|
333
|
+
|
|
334
|
+
def _retrieve_tool_calls_up_to_including_run_id(
|
|
335
|
+
self, thread_id: str, run_id: str, exclude_tool_calls_previous_runs: bool = False
|
|
336
|
+
) -> List[Message]:
|
|
337
|
+
"""
|
|
338
|
+
Converts tool calls to messages for a given thread and run.
|
|
339
|
+
|
|
340
|
+
This method retrieves tool calls for a specified thread and run, converts them into messages,
|
|
341
|
+
and optionally includes tool calls from previous runs.
|
|
342
|
+
|
|
343
|
+
:param thread_id: The ID of the thread.
|
|
344
|
+
:type thread_id: str
|
|
345
|
+
:param run_id: The ID of the run.
|
|
346
|
+
:type run_id: str
|
|
347
|
+
:param exclude_tool_calls_previous_runs: Whether to exclude tool calls from previous runs in the conversion. Default is False.
|
|
348
|
+
:type exclude_tool_calls_previous_runs: bool
|
|
349
|
+
:return: A list of messages converted from tool calls.
|
|
350
|
+
:rtype: List[Message]
|
|
351
|
+
"""
|
|
352
|
+
to_return: List[Message] = []
|
|
353
|
+
|
|
354
|
+
# Add all the tool calls and results of this run as messages.
|
|
355
|
+
for tool_call in self._list_tool_calls_chronological(thread_id, run_id):
|
|
356
|
+
# We need to add the tool call and the result as two separate messages.
|
|
357
|
+
to_return.extend(break_tool_call_into_messages(tool_call, run_id))
|
|
358
|
+
|
|
359
|
+
# We also request to add all the tool calls and results of the previous runs into the chat history. This is
|
|
360
|
+
# a bit of an expensive operation, but the requirement is to support this functionality, even at the penalty
|
|
361
|
+
# in latency in performance. New agents api is to include these details cheaply through a single API call in
|
|
362
|
+
# list_messages, but until that is available, we need to do this. User can also opt-out of this functionality
|
|
363
|
+
# by setting the exclude_tool_calls_previous_runs flag to True.
|
|
364
|
+
if not exclude_tool_calls_previous_runs:
|
|
365
|
+
# These are all the assistant (any number) in the thread.
|
|
366
|
+
# We set the include_run_id to False, since we don't want to include the current run's tool calls, which
|
|
367
|
+
# are already included in the previous step.
|
|
368
|
+
run_ids_up_to_run_id = AIAgentConverter._filter_run_ids_up_to_run_id(
|
|
369
|
+
self._data_retriever._list_run_ids_chronological(thread_id), run_id, include_run_id=False
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Since each _list_tool_calls_chronological call is expensive, we can use a thread pool to speed
|
|
373
|
+
# up the process by parallelizing the AI Services API requests.
|
|
374
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
375
|
+
futures = {
|
|
376
|
+
executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id
|
|
377
|
+
for run_id in run_ids_up_to_run_id
|
|
378
|
+
}
|
|
379
|
+
for future in as_completed(futures):
|
|
380
|
+
to_return.extend(future.result())
|
|
381
|
+
|
|
382
|
+
return to_return
|
|
383
|
+
|
|
384
|
+
def _retrieve_all_tool_calls(self, thread_id: str, run_ids: List[str]) -> List[Message]:
|
|
385
|
+
"""
|
|
386
|
+
Converts all tool calls to messages for a given thread and list of run IDs.
|
|
387
|
+
|
|
388
|
+
This method retrieves tool calls for a specified thread and list of run IDs, converts them into messages,
|
|
389
|
+
and returns the list of messages.
|
|
390
|
+
|
|
391
|
+
:param thread_id: The ID of the thread.
|
|
392
|
+
:type thread_id: str
|
|
393
|
+
:param run_ids: The list of run IDs.
|
|
394
|
+
:type run_ids: List[str]
|
|
395
|
+
:return: A list of messages converted from tool calls.
|
|
396
|
+
:rtype: List[Message]
|
|
397
|
+
"""
|
|
398
|
+
to_return: List[Message] = []
|
|
399
|
+
|
|
400
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
401
|
+
futures = {executor.submit(self._fetch_tool_calls, thread_id, run_id): run_id for run_id in run_ids}
|
|
402
|
+
for future in as_completed(futures):
|
|
403
|
+
to_return.extend(future.result())
|
|
404
|
+
|
|
405
|
+
return to_return
|
|
406
|
+
|
|
407
|
+
@staticmethod
|
|
408
|
+
def _is_agent_tool_call(message: Message) -> bool:
|
|
409
|
+
"""
|
|
410
|
+
Determines if a message is an agent tool call.
|
|
411
|
+
|
|
412
|
+
:param message: The message to be checked.
|
|
413
|
+
:type message: Message
|
|
414
|
+
:return: True if the message is an agent tool call, False otherwise.
|
|
415
|
+
:rtype: bool
|
|
416
|
+
"""
|
|
417
|
+
return (
|
|
418
|
+
message.role == _AGENT # Any other agent that this run's.
|
|
419
|
+
and isinstance(message.content, list) # Content is of expected type.
|
|
420
|
+
and len(message.content) > 0 # There are messages/calls/results present.
|
|
421
|
+
and "type" in message.content[0] # Being safe here.
|
|
422
|
+
and message.content[0]["type"] == _TOOL_CALL # Not interested in assistant's toolcalls.
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
@staticmethod
|
|
426
|
+
def _sort_messages(messages: List[Message]) -> List[Message]:
|
|
427
|
+
"""
|
|
428
|
+
Sorts a list of messages, placing messages with `createdAt` set to None at the beginning.
|
|
429
|
+
|
|
430
|
+
:param messages: The list of messages to be sorted.
|
|
431
|
+
:type messages: List[Message]
|
|
432
|
+
:return: The sorted list of messages.
|
|
433
|
+
:rtype: List[Message]
|
|
434
|
+
"""
|
|
435
|
+
# Separate messages with createdAt set to None
|
|
436
|
+
none_created_at = [message for message in messages if message.createdAt is None]
|
|
437
|
+
|
|
438
|
+
# Filter out messages with createdAt set to None and sort the remaining messages
|
|
439
|
+
sorted_messages = sorted(
|
|
440
|
+
[message for message in messages if message.createdAt is not None],
|
|
441
|
+
key=lambda x: (x.createdAt, x.role == _AGENT),
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# Combine the lists, placing messages with None createdAt at the beginning
|
|
445
|
+
return none_created_at + sorted_messages
|
|
446
|
+
|
|
447
|
+
def convert(self, thread_id: str, run_id: str, exclude_tool_calls_previous_runs: bool = False) -> dict:
|
|
448
|
+
"""
|
|
449
|
+
Converts the agent run to a format suitable for the OpenAI API.
|
|
450
|
+
|
|
451
|
+
:param thread_id: The ID of the thread.
|
|
452
|
+
:type thread_id: str
|
|
453
|
+
:param run_id: The ID of the run.
|
|
454
|
+
:type run_id: str
|
|
455
|
+
:param exclude_tool_calls_previous_runs: Whether to exclude tool calls from previous runs in the conversion.
|
|
456
|
+
:type exclude_tool_calls_previous_runs: bool
|
|
457
|
+
:return: The converted data in dictionary format.
|
|
458
|
+
:rtype: dict
|
|
459
|
+
"""
|
|
460
|
+
# Make the API call once and reuse the result.
|
|
461
|
+
thread_run: object = self._data_retriever._get_run(thread_id=thread_id, run_id=run_id)
|
|
462
|
+
|
|
463
|
+
# Walk through the "user-facing" conversation history and start adding messages.
|
|
464
|
+
chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
|
|
465
|
+
|
|
466
|
+
# Since this is Xth run of out possibly N runs, we are only interested is messages that are before the run X.
|
|
467
|
+
chrono_until_run_id = AIAgentConverter._filter_messages_up_to_run_id(chronological_conversation, run_id)
|
|
468
|
+
|
|
469
|
+
# Messages are now still in hidden AI services' type, so to get finer control over our typing, we need to
|
|
470
|
+
# convert the message to a friendly schema.
|
|
471
|
+
final_messages = AIAgentConverter._extract_typed_messages(chrono_until_run_id)
|
|
472
|
+
|
|
473
|
+
# Third, add all the tool calls and results as messages.
|
|
474
|
+
final_messages.extend(
|
|
475
|
+
self._retrieve_tool_calls_up_to_including_run_id(thread_id, run_id, exclude_tool_calls_previous_runs)
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# All of our final messages have to be in chronological order. We use a secondary sorting key,
|
|
479
|
+
# since the tool_result and assistant events would come with the same timestamp, so we need to
|
|
480
|
+
# sort them by role, such that the assistant's message would come after the tool result it's sending.
|
|
481
|
+
final_messages = AIAgentConverter._sort_messages(final_messages)
|
|
482
|
+
|
|
483
|
+
# Finally, we want to force the system message to be the first one in the list.
|
|
484
|
+
# First, we need to create the first system message of the thread.
|
|
485
|
+
instructions = thread_run.instructions
|
|
486
|
+
if instructions:
|
|
487
|
+
# The system message will have a string content.
|
|
488
|
+
final_messages.insert(0, SystemMessage(content=instructions))
|
|
489
|
+
|
|
490
|
+
# We need to collect all the messages that are not the current run's response.
|
|
491
|
+
query, responses = AIAgentConverter._break_into_query_responses(final_messages, run_id)
|
|
492
|
+
|
|
493
|
+
# Collect it into the final result and dump it to JSON.
|
|
494
|
+
final_result = EvaluatorData(
|
|
495
|
+
query=query,
|
|
496
|
+
response=responses,
|
|
497
|
+
tool_definitions=AIAgentConverter._extract_function_tool_definitions(thread_run),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
return json.loads(final_result.to_json())
|
|
501
|
+
|
|
502
|
+
def _prepare_single_thread_evaluation_data(self, thread_id: str, filename: str = None) -> List[dict]:
|
|
503
|
+
"""
|
|
504
|
+
Prepares evaluation data for a given thread and optionally writes it to a file.
|
|
505
|
+
|
|
506
|
+
This method retrieves all run IDs and messages for the specified thread, processes them to create evaluation data,
|
|
507
|
+
and optionally writes the evaluation data to a JSONL file. The evaluation data includes query and response messages
|
|
508
|
+
as well as tool definitions.
|
|
509
|
+
|
|
510
|
+
:param thread_id: The ID of the thread.
|
|
511
|
+
:type thread_id: str
|
|
512
|
+
:param filename: The name of the file to write the evaluation data to. If None, the data is not written to a file.
|
|
513
|
+
:type filename: str, optional
|
|
514
|
+
:return: A list of evaluation data dictionaries.
|
|
515
|
+
:rtype: List[dict]
|
|
516
|
+
"""
|
|
517
|
+
list_of_run_evaluations: List[dict] = []
|
|
518
|
+
|
|
519
|
+
# These are all the run IDs.
|
|
520
|
+
run_ids = self._data_retriever._list_run_ids_chronological(thread_id)
|
|
521
|
+
|
|
522
|
+
# If there were no messages in the thread, we can return an empty list.
|
|
523
|
+
if len(run_ids) < 1:
|
|
524
|
+
return list_of_run_evaluations
|
|
525
|
+
|
|
526
|
+
# These are all the messages.
|
|
527
|
+
chronological_conversation = self._data_retriever._list_messages_chronological(thread_id)
|
|
528
|
+
|
|
529
|
+
# If there are no messages in the thread, we can return an empty list.
|
|
530
|
+
if len(chronological_conversation) < 1:
|
|
531
|
+
return list_of_run_evaluations
|
|
532
|
+
|
|
533
|
+
# These are all the tool calls.
|
|
534
|
+
all_sorted_tool_calls = AIAgentConverter._sort_messages(self._retrieve_all_tool_calls(thread_id, run_ids))
|
|
535
|
+
|
|
536
|
+
# The last run should have all the tool definitions.
|
|
537
|
+
thread_run = self._data_retriever._get_run(thread_id=thread_id, run_id=run_ids[-1])
|
|
538
|
+
instructions = thread_run.instructions
|
|
539
|
+
|
|
540
|
+
# So then we can get the tool definitions.
|
|
541
|
+
tool_definitions = AIAgentConverter._extract_function_tool_definitions(thread_run)
|
|
542
|
+
|
|
543
|
+
# Now, we create a new evaluator object for each run.
|
|
544
|
+
for run_id in run_ids:
|
|
545
|
+
# We need to filter out the messages that are not from the current run.
|
|
546
|
+
simple_messages = AIAgentConverter._filter_messages_up_to_run_id(chronological_conversation, run_id)
|
|
547
|
+
|
|
548
|
+
# Now we need to convert from OpenAI's general ThreadMessage model into our Azure Agents models.
|
|
549
|
+
typed_simple_messages = AIAgentConverter._extract_typed_messages(simple_messages)
|
|
550
|
+
|
|
551
|
+
# We also need to filter out the tool calls that are not from the current run.
|
|
552
|
+
sorted_tool_calls = AIAgentConverter._filter_messages_up_to_run_id(all_sorted_tool_calls, run_id)
|
|
553
|
+
|
|
554
|
+
# Build the big list.
|
|
555
|
+
this_runs_messages = []
|
|
556
|
+
this_runs_messages.extend(typed_simple_messages)
|
|
557
|
+
this_runs_messages.extend(sorted_tool_calls)
|
|
558
|
+
|
|
559
|
+
# Sort it, so it looks nicely in chronological order.
|
|
560
|
+
this_runs_messages = AIAgentConverter._sort_messages(this_runs_messages)
|
|
561
|
+
|
|
562
|
+
# If we have a system message, we need to put it at the top of the list.
|
|
563
|
+
if instructions:
|
|
564
|
+
# The system message will have a string content.
|
|
565
|
+
this_runs_messages.insert(0, SystemMessage(content=instructions))
|
|
566
|
+
|
|
567
|
+
# Since now we have the messages in the expected order, we need to break them into the query and
|
|
568
|
+
# responses.
|
|
569
|
+
query, responses = AIAgentConverter._break_into_query_responses(this_runs_messages, run_id)
|
|
570
|
+
|
|
571
|
+
# Finally, let's pack it up into the final result.
|
|
572
|
+
final_result = EvaluatorData(
|
|
573
|
+
query=query,
|
|
574
|
+
response=responses,
|
|
575
|
+
tool_definitions=tool_definitions,
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
# Add it to the list of evaluations.
|
|
579
|
+
list_of_run_evaluations.append(json.loads(final_result.to_json()))
|
|
580
|
+
|
|
581
|
+
# So, if we have the filename, we can write it to the file, which is expected to be a JSONL file.
|
|
582
|
+
if filename:
|
|
583
|
+
with open(filename, mode="a", encoding="utf-8") as file:
|
|
584
|
+
for evaluation in list_of_run_evaluations:
|
|
585
|
+
file.write(json.dumps(evaluation) + "\n")
|
|
586
|
+
|
|
587
|
+
# We always return the list of evaluations, even if we didn't or did write it to a file.
|
|
588
|
+
return list_of_run_evaluations
|
|
589
|
+
|
|
590
|
+
def prepare_evaluation_data(self, thread_ids=Union[str, List[str]], filename: str = None) -> List[dict]:
|
|
591
|
+
"""
|
|
592
|
+
Prepares evaluation data for a given thread or list of threads and optionally writes it to a file.
|
|
593
|
+
|
|
594
|
+
This method retrieves all run IDs and messages for the specified thread(s), processes them to create evaluation data,
|
|
595
|
+
and optionally writes the evaluation data to a JSONL file. The evaluation data includes query and response messages
|
|
596
|
+
as well as tool definitions.
|
|
597
|
+
|
|
598
|
+
:param thread_ids: The ID(s) of the thread(s). Can be a single thread ID or a list of thread IDs.
|
|
599
|
+
:type thread_ids: Union[str, List[str]]
|
|
600
|
+
:param filename: The name of the file to write the evaluation data to. If None, the data is not written to a file.
|
|
601
|
+
:type filename: str, optional
|
|
602
|
+
:return: A list of evaluation data dictionaries.
|
|
603
|
+
:rtype: List[dict]
|
|
604
|
+
"""
|
|
605
|
+
# Single instance, pretty much the same as the list.
|
|
606
|
+
if isinstance(thread_ids, str):
|
|
607
|
+
return self._prepare_single_thread_evaluation_data(thread_id=thread_ids, filename=filename)
|
|
608
|
+
|
|
609
|
+
evaluations = []
|
|
610
|
+
with ThreadPoolExecutor(max_workers=self._MAX_WORKERS) as executor:
|
|
611
|
+
# We override the filename, because we don't want to write the file for each thread, having to handle
|
|
612
|
+
# threading issues and file being opened from multiple threads, instead, we just want to write it once
|
|
613
|
+
# at the end.
|
|
614
|
+
futures = {
|
|
615
|
+
executor.submit(self._prepare_single_thread_evaluation_data, str(thread_id), None): thread_id
|
|
616
|
+
for thread_id in thread_ids
|
|
617
|
+
}
|
|
618
|
+
for future in as_completed(futures):
|
|
619
|
+
evaluations.extend(future.result())
|
|
620
|
+
|
|
621
|
+
# So, if we have the filename, we can write it to the file, which is expected to be a JSONL file.
|
|
622
|
+
if filename:
|
|
623
|
+
with open(filename, mode="a", encoding="utf-8") as file:
|
|
624
|
+
for evaluation in evaluations:
|
|
625
|
+
file.write(json.dumps(evaluation) + "\n")
|
|
626
|
+
|
|
627
|
+
return evaluations
|
|
628
|
+
|
|
629
|
+
@staticmethod
|
|
630
|
+
def _run_ids_from_conversation(conversation: dict) -> List[str]:
|
|
631
|
+
"""
|
|
632
|
+
Extracts a list of unique run IDs from a conversation dictionary.
|
|
633
|
+
|
|
634
|
+
:param conversation: The conversation dictionary containing messages.
|
|
635
|
+
:type conversation: dict
|
|
636
|
+
:return: A list of unique run IDs in the order they appear.
|
|
637
|
+
:rtype: List[str]
|
|
638
|
+
"""
|
|
639
|
+
if not isinstance(conversation, dict) or "messages" not in conversation:
|
|
640
|
+
return []
|
|
641
|
+
run_ids_with_repetitions = [message["run_id"] for message in conversation["messages"] if "run_id" in message]
|
|
642
|
+
# Removes duplicates, requires Python 3.7+ to ensure order is preserved
|
|
643
|
+
run_ids = list(dict.fromkeys(run_ids_with_repetitions))
|
|
644
|
+
return run_ids
|
|
645
|
+
|
|
646
|
+
@staticmethod
|
|
647
|
+
def _convert_from_conversation(
|
|
648
|
+
conversation: dict, run_id: str, exclude_tool_calls_previous_runs: bool = False
|
|
649
|
+
) -> dict:
|
|
650
|
+
"""
|
|
651
|
+
Converts the agent run from a conversation dictionary object (a loaded thread) to a format suitable for the OpenAI API.
|
|
652
|
+
|
|
653
|
+
:param conversation: The conversation dictionary object.
|
|
654
|
+
The expected schema for the conversation dictionary is as follows:
|
|
655
|
+
{
|
|
656
|
+
"messages": [
|
|
657
|
+
{
|
|
658
|
+
"role": str, # The role of the message sender, e.g., "user", "assistant", "tool".
|
|
659
|
+
"content": list, # A list of content dictionaries.
|
|
660
|
+
"run_id": str, # The ID of the run associated with the message.
|
|
661
|
+
"createdAt": str, # The timestamp when the message was created.
|
|
662
|
+
...
|
|
663
|
+
},
|
|
664
|
+
...
|
|
665
|
+
],
|
|
666
|
+
"tools": [
|
|
667
|
+
{
|
|
668
|
+
"name": str, # The name of the tool.
|
|
669
|
+
"description": str, # The description of the tool.
|
|
670
|
+
"parameters": dict # The parameters for the tool.
|
|
671
|
+
},
|
|
672
|
+
...
|
|
673
|
+
]
|
|
674
|
+
}
|
|
675
|
+
:type conversation: dict
|
|
676
|
+
:param run_id: The ID of the run.
|
|
677
|
+
:type run_id: str
|
|
678
|
+
:param exclude_tool_calls_previous_runs: Whether to exclude tool calls from previous runs in the conversion.
|
|
679
|
+
:type exclude_tool_calls_previous_runs: bool
|
|
680
|
+
:return: The converted data in dictionary format serialized as string.
|
|
681
|
+
:rtype: dict
|
|
682
|
+
"""
|
|
683
|
+
# We need to type our messages to the correct type, so we can sliced and dice the way we like it.
|
|
684
|
+
messages: List[dict] = conversation.get("messages", [])
|
|
685
|
+
converted_messages: List[Message] = [convert_message(message) for message in messages]
|
|
686
|
+
|
|
687
|
+
# Accumulate the messages in the correct order, but only up to the run_id.
|
|
688
|
+
final_messages: List[Message] = []
|
|
689
|
+
for converted_message in AIAgentConverter._filter_messages_up_to_run_id(converted_messages, run_id):
|
|
690
|
+
# By default, we want to add all the messages, even if we are on the 10th run of the thread, we want to know
|
|
691
|
+
# what the assistant said, what the assistant called, and what the result was.
|
|
692
|
+
if exclude_tool_calls_previous_runs:
|
|
693
|
+
# We would not be interested in tool call messages in the query, unless it's the current run id.
|
|
694
|
+
if converted_message.run_id != run_id:
|
|
695
|
+
# Anything with tool, we can throw out, since we don't care about the tooling of possibly other agents
|
|
696
|
+
# that came before the run we're interested in.
|
|
697
|
+
if converted_message.role == _TOOL:
|
|
698
|
+
continue
|
|
699
|
+
|
|
700
|
+
# We also don't want anything that is an assistant calling a tool.
|
|
701
|
+
if AIAgentConverter._is_agent_tool_call(converted_message):
|
|
702
|
+
continue
|
|
703
|
+
|
|
704
|
+
# We're good to add it.
|
|
705
|
+
final_messages.append(converted_message)
|
|
706
|
+
|
|
707
|
+
# Just in case, sort them all out by putting the messages without createdAt, like SystemMessage's at the
|
|
708
|
+
# top of the list, so they appear first.
|
|
709
|
+
final_messages = AIAgentConverter._sort_messages(final_messages)
|
|
710
|
+
|
|
711
|
+
# Create the tool definitions.
|
|
712
|
+
tools = conversation.get("tools", [])
|
|
713
|
+
tool_definitions = [
|
|
714
|
+
ToolDefinition(name=tool["name"], description=tool.get("description"), parameters=tool["parameters"])
|
|
715
|
+
for tool in tools
|
|
716
|
+
]
|
|
717
|
+
|
|
718
|
+
# Separate into the chat history, with all other user-assistant messages, and the assistant's response, where
|
|
719
|
+
# the latter would include
|
|
720
|
+
query, responses = AIAgentConverter._break_into_query_responses(final_messages, run_id)
|
|
721
|
+
|
|
722
|
+
# Create the final result
|
|
723
|
+
final_result = EvaluatorData(query=query, response=responses, tool_definitions=tool_definitions)
|
|
724
|
+
|
|
725
|
+
return json.loads(final_result.to_json())
|
|
726
|
+
|
|
727
|
+
@staticmethod
|
|
728
|
+
def _convert_from_file(filename: str, run_id: str) -> dict:
|
|
729
|
+
"""
|
|
730
|
+
Converts the agent run from a JSON file to a format suitable for the OpenAI API, the JSON file being a thread.
|
|
731
|
+
|
|
732
|
+
:param filename: The path to the JSON file.
|
|
733
|
+
The expected schema for the JSON file is as follows:
|
|
734
|
+
{
|
|
735
|
+
"messages": [
|
|
736
|
+
{
|
|
737
|
+
"role": str, # The role of the message sender, e.g., "user", "assistant", "tool".
|
|
738
|
+
"content": list, # A list of content dictionaries.
|
|
739
|
+
"run_id": str, # The ID of the run associated with the message.
|
|
740
|
+
"createdAt": str, # The timestamp when the message was created.
|
|
741
|
+
...
|
|
742
|
+
},
|
|
743
|
+
...
|
|
744
|
+
],
|
|
745
|
+
"tools": [
|
|
746
|
+
{
|
|
747
|
+
"name": str, # The name of the tool.
|
|
748
|
+
"description": str, # The description of the tool.
|
|
749
|
+
"parameters": dict # The parameters for the tool.
|
|
750
|
+
},
|
|
751
|
+
...
|
|
752
|
+
]
|
|
753
|
+
}
|
|
754
|
+
:type filename: str
|
|
755
|
+
:param run_id: The ID of the run.
|
|
756
|
+
:type run_id: str
|
|
757
|
+
:return: The converted data in dictionary format serialized as string.
|
|
758
|
+
:rtype: dict
|
|
759
|
+
"""
|
|
760
|
+
|
|
761
|
+
with open(filename, mode="r", encoding="utf-8") as file:
|
|
762
|
+
data = json.load(file)
|
|
763
|
+
|
|
764
|
+
return AIAgentConverter._convert_from_conversation(data, run_id)
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
@experimental
|
|
768
|
+
class AIAgentDataRetriever:
|
|
769
|
+
# Maximum items to fetch in a single AI Services API call (imposed by the service).
|
|
770
|
+
_AI_SERVICES_API_MAX_LIMIT = 100
|
|
771
|
+
|
|
772
|
+
def __init__(self, project_client: AIProjectClient):
|
|
773
|
+
"""
|
|
774
|
+
Initializes the AIAgentDataRetriever with the given AI project client.
|
|
775
|
+
|
|
776
|
+
:param project_client: The AI project client used for API interactions.
|
|
777
|
+
:type project_client: AIProjectClient
|
|
778
|
+
"""
|
|
779
|
+
self.project_client = project_client
|
|
780
|
+
|
|
781
|
+
@abstractmethod
|
|
782
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
783
|
+
pass
|
|
784
|
+
|
|
785
|
+
@abstractmethod
|
|
786
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
787
|
+
pass
|
|
788
|
+
|
|
789
|
+
@abstractmethod
|
|
790
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
791
|
+
pass
|
|
792
|
+
|
|
793
|
+
@abstractmethod
|
|
794
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
795
|
+
pass
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
@experimental
|
|
799
|
+
class LegacyAgentDataRetriever(AIAgentDataRetriever):
|
|
800
|
+
|
|
801
|
+
def __init__(self, **kwargs):
|
|
802
|
+
super(LegacyAgentDataRetriever, self).__init__(**kwargs)
|
|
803
|
+
|
|
804
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
805
|
+
"""
|
|
806
|
+
Lists messages in chronological order for a given thread.
|
|
807
|
+
|
|
808
|
+
:param thread_id: The ID of the thread.
|
|
809
|
+
:type thread_id: str
|
|
810
|
+
:return: A list of messages in chronological order.
|
|
811
|
+
"""
|
|
812
|
+
to_return = []
|
|
813
|
+
|
|
814
|
+
has_more = True
|
|
815
|
+
after = None
|
|
816
|
+
while has_more:
|
|
817
|
+
messages = self.project_client.agents.list_messages(
|
|
818
|
+
thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc", after=after
|
|
819
|
+
)
|
|
820
|
+
has_more = messages.has_more
|
|
821
|
+
after = messages.last_id
|
|
822
|
+
if messages.data:
|
|
823
|
+
# We need to add the messages to the accumulator.
|
|
824
|
+
to_return.extend(messages.data)
|
|
825
|
+
|
|
826
|
+
return to_return
|
|
827
|
+
|
|
828
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
829
|
+
run_steps_chronological: List[object] = []
|
|
830
|
+
has_more = True
|
|
831
|
+
after = None
|
|
832
|
+
while has_more:
|
|
833
|
+
run_steps = self.project_client.agents.list_run_steps(
|
|
834
|
+
thread_id=thread_id,
|
|
835
|
+
run_id=run_id,
|
|
836
|
+
limit=self._AI_SERVICES_API_MAX_LIMIT,
|
|
837
|
+
order="asc",
|
|
838
|
+
after=after,
|
|
839
|
+
include=["step_details.tool_calls[*].file_search.results[*].content"],
|
|
840
|
+
)
|
|
841
|
+
has_more = run_steps.has_more
|
|
842
|
+
after = run_steps.last_id
|
|
843
|
+
if run_steps.data:
|
|
844
|
+
# We need to add the run steps to the accumulator.
|
|
845
|
+
run_steps_chronological.extend(run_steps.data)
|
|
846
|
+
return run_steps_chronological
|
|
847
|
+
|
|
848
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
849
|
+
"""
|
|
850
|
+
Lists run IDs in chronological order for a given thread.
|
|
851
|
+
|
|
852
|
+
:param thread_id: The ID of the thread.
|
|
853
|
+
:type thread_id: str
|
|
854
|
+
:return: A list of run IDs in chronological order.
|
|
855
|
+
:rtype: List[str]
|
|
856
|
+
"""
|
|
857
|
+
runs = self.project_client.agents.list_runs(thread_id=thread_id, order="asc")
|
|
858
|
+
run_ids = [run["id"] for run in runs["data"]]
|
|
859
|
+
return run_ids
|
|
860
|
+
|
|
861
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
862
|
+
return self.project_client.agents.get_run(thread_id=thread_id, run_id=run_id)
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
@experimental
|
|
866
|
+
class FDPAgentDataRetriever(AIAgentDataRetriever):
|
|
867
|
+
|
|
868
|
+
def __init__(self, **kwargs):
|
|
869
|
+
super(FDPAgentDataRetriever, self).__init__(**kwargs)
|
|
870
|
+
|
|
871
|
+
def _list_messages_chronological(self, thread_id: str):
|
|
872
|
+
"""
|
|
873
|
+
Lists messages in chronological order for a given thread.
|
|
874
|
+
|
|
875
|
+
:param thread_id: The ID of the thread.
|
|
876
|
+
:type thread_id: str
|
|
877
|
+
:return: A list of messages in chronological order.
|
|
878
|
+
"""
|
|
879
|
+
message_iter = self.project_client.agents.messages.list(
|
|
880
|
+
thread_id=thread_id, limit=self._AI_SERVICES_API_MAX_LIMIT, order="asc"
|
|
881
|
+
)
|
|
882
|
+
return [message for message in message_iter]
|
|
883
|
+
|
|
884
|
+
def _list_run_steps_chronological(self, thread_id: str, run_id: str):
|
|
885
|
+
|
|
886
|
+
return self.project_client.agents.run_steps.list(
|
|
887
|
+
thread_id=thread_id,
|
|
888
|
+
run_id=run_id,
|
|
889
|
+
limit=self._AI_SERVICES_API_MAX_LIMIT,
|
|
890
|
+
order="asc",
|
|
891
|
+
include=["step_details.tool_calls[*].file_search.results[*].content"],
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
|
|
895
|
+
runs = self.project_client.agents.runs.list(thread_id=thread_id, order="asc")
|
|
896
|
+
return [run.id for run in runs]
|
|
897
|
+
|
|
898
|
+
def _get_run(self, thread_id: str, run_id: str):
|
|
899
|
+
return self.project_client.agents.runs.get(thread_id=thread_id, run_id=run_id)
|