azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# flake8: noqa
|
|
2
|
-
# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611
|
|
2
|
+
# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0114,R0913,E0702,R0903,C0411
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
5
5
|
# ---------------------------------------------------------
|
|
@@ -7,19 +7,22 @@ import asyncio
|
|
|
7
7
|
import importlib.resources as pkg_resources
|
|
8
8
|
import json
|
|
9
9
|
import os
|
|
10
|
+
import random
|
|
10
11
|
import re
|
|
11
12
|
import warnings
|
|
12
|
-
from typing import Any, Callable, Dict, List, Optional, Union
|
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, Union, Tuple
|
|
13
14
|
|
|
14
|
-
from
|
|
15
|
-
from promptflow.core import AzureOpenAIModelConfiguration, Flow
|
|
15
|
+
from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
|
|
16
16
|
from tqdm import tqdm
|
|
17
17
|
|
|
18
|
-
from
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
18
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
19
|
+
from azure.ai.evaluation._common.utils import construct_prompty_model_config
|
|
20
|
+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
from .._exceptions import ErrorBlame, ErrorCategory, EvaluationException
|
|
23
|
+
from .._user_agent import UserAgentSingleton
|
|
24
|
+
from ._conversation.constants import ConversationRole
|
|
25
|
+
from ._helpers import ConversationHistory, Turn
|
|
23
26
|
from ._utils import JsonLineChatProtocol
|
|
24
27
|
|
|
25
28
|
|
|
@@ -27,53 +30,82 @@ from ._utils import JsonLineChatProtocol
|
|
|
27
30
|
class Simulator:
|
|
28
31
|
"""
|
|
29
32
|
Simulator for generating synthetic conversations.
|
|
33
|
+
|
|
34
|
+
:param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration.
|
|
35
|
+
:type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration, ~azure.ai.evaluation.OpenAIModelConfiguration]
|
|
36
|
+
:raises ValueError: If the model_config does not contain the required keys or any value is None.
|
|
37
|
+
|
|
38
|
+
.. admonition:: Example:
|
|
39
|
+
|
|
40
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
41
|
+
:start-after: [START nonadversarial_simulator]
|
|
42
|
+
:end-before: [END nonadversarial_simulator]
|
|
43
|
+
:language: python
|
|
44
|
+
:dedent: 8
|
|
45
|
+
:caption: Run a Simulator for 2 queries and 4 conversation turns.
|
|
30
46
|
"""
|
|
31
47
|
|
|
32
|
-
def __init__(self,
|
|
33
|
-
|
|
34
|
-
|
|
48
|
+
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
|
|
49
|
+
self._validate_model_config(model_config)
|
|
50
|
+
self.model_config = model_config
|
|
51
|
+
if "api_version" not in self.model_config:
|
|
52
|
+
self.model_config["api_version"] = "2024-06-01" # type: ignore
|
|
35
53
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
:paramtype credential: Optional[Any]
|
|
40
|
-
:raises ValueError: If the azure_ai_project does not contain the required keys or any value is None.
|
|
41
|
-
"""
|
|
42
|
-
self._validate_project_config(azure_ai_project)
|
|
43
|
-
self.azure_ai_project = azure_ai_project
|
|
44
|
-
self.azure_ai_project["api_version"] = "2024-02-15-preview"
|
|
45
|
-
self.credential = credential
|
|
54
|
+
@staticmethod
|
|
55
|
+
def __user_agent() -> str:
|
|
56
|
+
return f"{UserAgentSingleton().value} (type=simulator; subtype=Simulator)"
|
|
46
57
|
|
|
47
58
|
@staticmethod
|
|
48
|
-
def
|
|
59
|
+
def _validate_model_config(model_config: Any):
|
|
49
60
|
"""
|
|
50
|
-
Validates the
|
|
61
|
+
Validates the model_config to ensure all required keys are present and have non-None values.
|
|
62
|
+
If 'type' is not specified, it will attempt to infer the type based on the keys present.
|
|
51
63
|
|
|
52
|
-
:param
|
|
53
|
-
:type
|
|
64
|
+
:param model_config: The model configuration dictionary.
|
|
65
|
+
:type model_config: Dict[str, Any]
|
|
54
66
|
:raises ValueError: If required keys are missing or any of the values are None.
|
|
55
67
|
"""
|
|
56
|
-
|
|
57
|
-
if not
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
68
|
+
# Attempt to infer 'type' if not provided
|
|
69
|
+
if "type" not in model_config:
|
|
70
|
+
if "azure_deployment" in model_config and "azure_endpoint" in model_config:
|
|
71
|
+
model_config["type"] = "azure_openai"
|
|
72
|
+
elif "model" in model_config:
|
|
73
|
+
model_config["type"] = "openai"
|
|
74
|
+
else:
|
|
75
|
+
raise ValueError(
|
|
76
|
+
"Unable to infer 'type' from model_config. Please specify 'type' as 'azure_openai' or 'openai'."
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if model_config["type"] == "azure_openai":
|
|
80
|
+
required_keys = ["azure_deployment", "azure_endpoint"]
|
|
81
|
+
elif model_config["type"] == "openai":
|
|
82
|
+
required_keys = ["api_key", "model"]
|
|
83
|
+
else:
|
|
84
|
+
raise ValueError("model_config 'type' must be 'azure_openai' or 'openai'.")
|
|
85
|
+
|
|
86
|
+
missing_keys = [key for key in required_keys if key not in model_config]
|
|
87
|
+
if missing_keys:
|
|
88
|
+
raise ValueError(f"model_config is missing required keys: {', '.join(missing_keys)}")
|
|
89
|
+
none_keys = [key for key in required_keys if model_config.get(key) is None]
|
|
90
|
+
if none_keys:
|
|
91
|
+
raise ValueError(f"The following keys in model_config must not be None: {', '.join(none_keys)}")
|
|
61
92
|
|
|
62
|
-
# @monitor_task_simulator
|
|
63
93
|
async def __call__(
|
|
64
94
|
self,
|
|
65
95
|
*,
|
|
66
96
|
target: Callable,
|
|
67
97
|
max_conversation_turns: int = 5,
|
|
68
|
-
tasks: List[
|
|
98
|
+
tasks: List[str] = [],
|
|
69
99
|
text: str = "",
|
|
70
100
|
num_queries: int = 5,
|
|
71
101
|
query_response_generating_prompty: Optional[str] = None,
|
|
72
102
|
user_simulator_prompty: Optional[str] = None,
|
|
73
103
|
api_call_delay_sec: float = 1,
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
conversation_turns: List[List[str]] = [],
|
|
104
|
+
query_response_generating_prompty_options: Dict[str, Any] = {},
|
|
105
|
+
user_simulator_prompty_options: Dict[str, Any] = {},
|
|
106
|
+
conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [],
|
|
107
|
+
concurrent_async_tasks: int = 5,
|
|
108
|
+
randomization_seed: Optional[int] = None,
|
|
77
109
|
**kwargs,
|
|
78
110
|
) -> List[JsonLineChatProtocol]:
|
|
79
111
|
"""
|
|
@@ -95,12 +127,18 @@ class Simulator:
|
|
|
95
127
|
:paramtype user_simulator_prompty: Optional[str]
|
|
96
128
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
97
129
|
:paramtype api_call_delay_sec: float
|
|
98
|
-
:keyword
|
|
99
|
-
:paramtype
|
|
100
|
-
:keyword
|
|
101
|
-
:paramtype
|
|
130
|
+
:keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty.
|
|
131
|
+
:paramtype query_response_generating_prompty_options: Dict[str, Any]
|
|
132
|
+
:keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
|
|
133
|
+
:paramtype user_simulator_prompty_options: Dict[str, Any]
|
|
102
134
|
:keyword conversation_turns: Predefined conversation turns to simulate.
|
|
103
|
-
:paramtype conversation_turns: List[List[str]]
|
|
135
|
+
:paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]]
|
|
136
|
+
:keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
|
|
137
|
+
Defaults to 5.
|
|
138
|
+
:paramtype concurrent_async_tasks: int
|
|
139
|
+
:keyword randomization_seed: The seed used to randomize task/query order. If unset, the system's
|
|
140
|
+
default seed is used. Defaults to None.
|
|
141
|
+
:paramtype randomization_seed: Optional[int]
|
|
104
142
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
105
143
|
:rtype: List[JsonLineChatProtocol]
|
|
106
144
|
|
|
@@ -109,79 +147,75 @@ class Simulator:
|
|
|
109
147
|
|
|
110
148
|
Modes:
|
|
111
149
|
- Task-Free Mode: When only num_queries is specified and tasks is not, the method generates num_queries x max_conversation_turns lines of simulated data grounded in the context of the text.
|
|
112
|
-
- Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines
|
|
150
|
+
- Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines will be simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used.
|
|
113
151
|
- Conversation Starter Mode: When conversation_turns are specified, the method starts each conversation with the user-specified queries and then follows the conversation history for the remaining turns.
|
|
114
152
|
"""
|
|
115
153
|
if conversation_turns and (text or tasks):
|
|
116
154
|
raise ValueError("Cannot specify both conversation_turns and text/tasks")
|
|
117
155
|
|
|
118
|
-
if num_queries > len(tasks):
|
|
156
|
+
if text and num_queries > len(tasks):
|
|
119
157
|
warnings.warn(
|
|
120
158
|
f"You have specified 'num_queries' > len('tasks') ({num_queries} > {len(tasks)}). "
|
|
121
159
|
f"All tasks will be used for generation and the remaining {num_queries - len(tasks)} lines will be simulated in task-free mode"
|
|
122
160
|
)
|
|
123
|
-
elif num_queries < len(tasks):
|
|
161
|
+
elif text and num_queries < len(tasks):
|
|
124
162
|
warnings.warn(
|
|
125
163
|
f"You have specified 'num_queries' < len('tasks') ({num_queries} < {len(tasks)}). "
|
|
126
164
|
f"Only the first {num_queries} lines of the specified tasks will be simulated."
|
|
127
165
|
)
|
|
128
|
-
num_queries = min(num_queries, len(tasks))
|
|
129
|
-
max_conversation_turns *= 2 # account for both user and assistant turns
|
|
130
166
|
|
|
131
|
-
|
|
167
|
+
# Apply randomization to tasks if seed is provided
|
|
168
|
+
if randomization_seed is not None and tasks:
|
|
169
|
+
# Create a local random instance to avoid polluting global state
|
|
170
|
+
local_random = random.Random(randomization_seed)
|
|
171
|
+
tasks = tasks.copy() # Don't modify the original list
|
|
172
|
+
local_random.shuffle(tasks)
|
|
132
173
|
|
|
174
|
+
max_conversation_turns *= 2 # account for both user and assistant turns
|
|
175
|
+
|
|
176
|
+
prompty_model_config = self.model_config
|
|
133
177
|
if conversation_turns:
|
|
134
178
|
return await self._simulate_with_predefined_turns(
|
|
135
179
|
target=target,
|
|
136
180
|
max_conversation_turns=max_conversation_turns,
|
|
137
181
|
conversation_turns=conversation_turns,
|
|
138
182
|
user_simulator_prompty=user_simulator_prompty,
|
|
139
|
-
|
|
183
|
+
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
140
184
|
api_call_delay_sec=api_call_delay_sec,
|
|
141
185
|
prompty_model_config=prompty_model_config,
|
|
186
|
+
concurrent_async_tasks=concurrent_async_tasks,
|
|
142
187
|
)
|
|
143
188
|
|
|
144
189
|
query_responses = await self._generate_query_responses(
|
|
145
190
|
text=text,
|
|
146
191
|
num_queries=num_queries,
|
|
147
192
|
query_response_generating_prompty=query_response_generating_prompty,
|
|
148
|
-
|
|
193
|
+
query_response_generating_prompty_options=query_response_generating_prompty_options,
|
|
149
194
|
prompty_model_config=prompty_model_config,
|
|
150
195
|
**kwargs,
|
|
151
196
|
)
|
|
152
|
-
|
|
153
197
|
return await self._create_conversations_from_query_responses(
|
|
154
198
|
query_responses=query_responses,
|
|
155
199
|
max_conversation_turns=max_conversation_turns,
|
|
156
200
|
tasks=tasks,
|
|
157
201
|
user_simulator_prompty=user_simulator_prompty,
|
|
158
|
-
|
|
202
|
+
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
159
203
|
target=target,
|
|
160
204
|
api_call_delay_sec=api_call_delay_sec,
|
|
205
|
+
text=text,
|
|
161
206
|
)
|
|
162
207
|
|
|
163
|
-
def _build_prompty_model_config(self) -> Dict[str, Any]:
|
|
164
|
-
"""
|
|
165
|
-
Constructs the configuration for the prompty model.
|
|
166
|
-
|
|
167
|
-
:return: A dictionary containing the prompty model configuration, including API version and user agent headers if applicable.
|
|
168
|
-
:rtype: Dict[str, Any]
|
|
169
|
-
"""
|
|
170
|
-
config = {"configuration": self.azure_ai_project}
|
|
171
|
-
if USER_AGENT and isinstance(self.azure_ai_project, AzureOpenAIModelConfiguration):
|
|
172
|
-
config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})
|
|
173
|
-
return config
|
|
174
|
-
|
|
175
208
|
async def _simulate_with_predefined_turns(
|
|
176
209
|
self,
|
|
177
210
|
*,
|
|
178
211
|
target: Callable,
|
|
179
212
|
max_conversation_turns: int,
|
|
180
|
-
conversation_turns: List[List[str]],
|
|
213
|
+
conversation_turns: List[List[Union[str, Dict[str, Any]]]],
|
|
181
214
|
user_simulator_prompty: Optional[str],
|
|
182
|
-
|
|
215
|
+
user_simulator_prompty_options: Dict[str, Any],
|
|
183
216
|
api_call_delay_sec: float,
|
|
184
|
-
prompty_model_config:
|
|
217
|
+
prompty_model_config: Any,
|
|
218
|
+
concurrent_async_tasks: int,
|
|
185
219
|
) -> List[JsonLineChatProtocol]:
|
|
186
220
|
"""
|
|
187
221
|
Simulates conversations using predefined conversation turns.
|
|
@@ -191,54 +225,81 @@ class Simulator:
|
|
|
191
225
|
:keyword max_conversation_turns: Maximum number of turns for the simulation.
|
|
192
226
|
:paramtype max_conversation_turns: int
|
|
193
227
|
:keyword conversation_turns: A list of predefined conversation turns.
|
|
194
|
-
:paramtype conversation_turns: List[List[str]]
|
|
228
|
+
:paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]]
|
|
195
229
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
196
230
|
:paramtype user_simulator_prompty: Optional[str]
|
|
197
|
-
:keyword
|
|
198
|
-
:paramtype
|
|
231
|
+
:keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
|
|
232
|
+
:paramtype user_simulator_prompty_options: Dict[str, Any]
|
|
199
233
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
200
234
|
:paramtype api_call_delay_sec: float
|
|
201
235
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
202
|
-
:paramtype prompty_model_config:
|
|
236
|
+
:paramtype prompty_model_config: Any
|
|
237
|
+
:keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
|
|
238
|
+
:paramtype concurrent_async_tasks: int
|
|
203
239
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
204
240
|
:rtype: List[JsonLineChatProtocol]
|
|
205
241
|
"""
|
|
206
|
-
simulated_conversations = []
|
|
207
242
|
progress_bar = tqdm(
|
|
208
243
|
total=int(len(conversation_turns) * (max_conversation_turns / 2)),
|
|
209
244
|
desc="Simulating with predefined conversation turns: ",
|
|
210
245
|
ncols=100,
|
|
211
246
|
unit="messages",
|
|
212
247
|
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
current_simulation
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
248
|
+
semaphore = asyncio.Semaphore(concurrent_async_tasks)
|
|
249
|
+
progress_bar_lock = asyncio.Lock()
|
|
250
|
+
|
|
251
|
+
async def run_simulation(simulation: List[Union[str, Dict[str, Any]]]) -> JsonLineChatProtocol:
|
|
252
|
+
async with semaphore:
|
|
253
|
+
current_simulation = ConversationHistory()
|
|
254
|
+
for simulated_turn in simulation:
|
|
255
|
+
if isinstance(simulated_turn, str):
|
|
256
|
+
user_turn = Turn(role=ConversationRole.USER, content=simulated_turn)
|
|
257
|
+
elif isinstance(simulated_turn, dict):
|
|
258
|
+
user_turn = Turn(
|
|
259
|
+
role=ConversationRole.USER,
|
|
260
|
+
content=str(simulated_turn.get("content")),
|
|
261
|
+
context=str(simulated_turn.get("context")),
|
|
262
|
+
)
|
|
263
|
+
else:
|
|
264
|
+
raise ValueError(
|
|
265
|
+
"Each simulated turn must be a string or a dict with 'content' and 'context' keys"
|
|
266
|
+
)
|
|
267
|
+
current_simulation.add_to_history(user_turn)
|
|
268
|
+
assistant_response, assistant_context = await self._get_target_response(
|
|
269
|
+
target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
|
|
270
|
+
)
|
|
271
|
+
assistant_turn = Turn(
|
|
272
|
+
role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context
|
|
273
|
+
)
|
|
274
|
+
current_simulation.add_to_history(assistant_turn)
|
|
275
|
+
async with progress_bar_lock:
|
|
276
|
+
progress_bar.update(1)
|
|
277
|
+
|
|
278
|
+
if len(current_simulation) < max_conversation_turns:
|
|
279
|
+
await self._extend_conversation_with_simulator(
|
|
280
|
+
current_simulation=current_simulation,
|
|
281
|
+
max_conversation_turns=max_conversation_turns,
|
|
282
|
+
user_simulator_prompty=user_simulator_prompty,
|
|
283
|
+
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
284
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
285
|
+
prompty_model_config=prompty_model_config,
|
|
286
|
+
target=target,
|
|
287
|
+
progress_bar=progress_bar,
|
|
288
|
+
progress_bar_lock=progress_bar_lock,
|
|
289
|
+
)
|
|
290
|
+
return JsonLineChatProtocol(
|
|
291
|
+
{
|
|
292
|
+
"messages": current_simulation.to_list(),
|
|
293
|
+
"finish_reason": ["stop"],
|
|
294
|
+
"context": {},
|
|
295
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
296
|
+
}
|
|
236
297
|
)
|
|
237
298
|
|
|
238
|
-
|
|
239
|
-
|
|
299
|
+
tasks = [asyncio.create_task(run_simulation(simulation)) for simulation in conversation_turns]
|
|
300
|
+
results = await asyncio.gather(*tasks)
|
|
240
301
|
progress_bar.close()
|
|
241
|
-
return
|
|
302
|
+
return results
|
|
242
303
|
|
|
243
304
|
async def _extend_conversation_with_simulator(
|
|
244
305
|
self,
|
|
@@ -246,11 +307,12 @@ class Simulator:
|
|
|
246
307
|
current_simulation: ConversationHistory,
|
|
247
308
|
max_conversation_turns: int,
|
|
248
309
|
user_simulator_prompty: Optional[str],
|
|
249
|
-
|
|
310
|
+
user_simulator_prompty_options: Dict[str, Any],
|
|
250
311
|
api_call_delay_sec: float,
|
|
251
312
|
prompty_model_config: Dict[str, Any],
|
|
252
313
|
target: Callable,
|
|
253
314
|
progress_bar: tqdm,
|
|
315
|
+
progress_bar_lock: asyncio.Lock,
|
|
254
316
|
):
|
|
255
317
|
"""
|
|
256
318
|
Extends an ongoing conversation using a user simulator until the maximum number of turns is reached.
|
|
@@ -261,8 +323,8 @@ class Simulator:
|
|
|
261
323
|
:paramtype max_conversation_turns: int,
|
|
262
324
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
263
325
|
:paramtype user_simulator_prompty: Optional[str],
|
|
264
|
-
:keyword
|
|
265
|
-
:paramtype
|
|
326
|
+
:keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
|
|
327
|
+
:paramtype user_simulator_prompty_options: Dict[str, Any],
|
|
266
328
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
267
329
|
:paramtype api_call_delay_sec: float,
|
|
268
330
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
@@ -271,68 +333,92 @@ class Simulator:
|
|
|
271
333
|
:paramtype target: Callable,
|
|
272
334
|
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
273
335
|
:paramtype progress_bar: tqdm,
|
|
336
|
+
:keyword progress_bar_lock: Lock for updating the progress bar safely.
|
|
337
|
+
:paramtype progress_bar_lock: asyncio.Lock
|
|
274
338
|
"""
|
|
275
339
|
user_flow = self._load_user_simulation_flow(
|
|
276
|
-
user_simulator_prompty=user_simulator_prompty,
|
|
340
|
+
user_simulator_prompty=user_simulator_prompty, # type: ignore
|
|
277
341
|
prompty_model_config=prompty_model_config,
|
|
278
|
-
|
|
342
|
+
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
279
343
|
)
|
|
280
344
|
|
|
281
345
|
while len(current_simulation) < max_conversation_turns:
|
|
282
|
-
user_response_content = user_flow(
|
|
283
|
-
task="Continue the conversation",
|
|
346
|
+
user_response_content = await user_flow(
|
|
347
|
+
task="Continue the conversation",
|
|
348
|
+
conversation_history=current_simulation.to_context_free_list(),
|
|
349
|
+
**user_simulator_prompty_options,
|
|
284
350
|
)
|
|
285
351
|
user_response = self._parse_prompty_response(response=user_response_content)
|
|
286
352
|
user_turn = Turn(role=ConversationRole.USER, content=user_response["content"])
|
|
287
353
|
current_simulation.add_to_history(user_turn)
|
|
288
354
|
await asyncio.sleep(api_call_delay_sec)
|
|
289
|
-
assistant_response = await self._get_target_response(
|
|
355
|
+
assistant_response, assistant_context = await self._get_target_response(
|
|
290
356
|
target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation
|
|
291
357
|
)
|
|
292
|
-
assistant_turn = Turn(
|
|
358
|
+
assistant_turn = Turn(
|
|
359
|
+
role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context
|
|
360
|
+
)
|
|
293
361
|
current_simulation.add_to_history(assistant_turn)
|
|
294
|
-
|
|
362
|
+
async with progress_bar_lock:
|
|
363
|
+
progress_bar.update(1)
|
|
295
364
|
|
|
296
365
|
def _load_user_simulation_flow(
|
|
297
366
|
self,
|
|
298
367
|
*,
|
|
299
|
-
user_simulator_prompty: Union[str, os.PathLike],
|
|
368
|
+
user_simulator_prompty: Optional[Union[str, os.PathLike]],
|
|
300
369
|
prompty_model_config: Dict[str, Any],
|
|
301
|
-
|
|
302
|
-
) ->
|
|
370
|
+
user_simulator_prompty_options: Dict[str, Any],
|
|
371
|
+
) -> "AsyncPrompty": # type: ignore
|
|
303
372
|
"""
|
|
304
373
|
Loads the flow for simulating user interactions.
|
|
305
374
|
|
|
306
375
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
307
|
-
:paramtype user_simulator_prompty: Union[str, os.PathLike]
|
|
376
|
+
:paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]]
|
|
308
377
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
309
378
|
:paramtype prompty_model_config: Dict[str, Any]
|
|
310
|
-
:keyword
|
|
311
|
-
:paramtype
|
|
379
|
+
:keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
|
|
380
|
+
:paramtype user_simulator_prompty_options: Dict[str, Any]
|
|
312
381
|
:return: The loaded flow for simulating user interactions.
|
|
313
|
-
:rtype:
|
|
382
|
+
:rtype: AsyncPrompty
|
|
314
383
|
"""
|
|
315
384
|
if not user_simulator_prompty:
|
|
316
385
|
package = "azure.ai.evaluation.simulator._prompty"
|
|
317
386
|
resource_name = "task_simulate.prompty"
|
|
318
387
|
try:
|
|
319
388
|
# Access the resource as a file path
|
|
389
|
+
# pylint: disable=deprecated-method
|
|
320
390
|
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
321
|
-
|
|
391
|
+
prompty_model_config = construct_prompty_model_config(
|
|
392
|
+
model_config=prompty_model_config, # type: ignore
|
|
393
|
+
default_api_version="2024-06-01",
|
|
394
|
+
user_agent=self.__user_agent(),
|
|
395
|
+
)
|
|
396
|
+
return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
|
|
322
397
|
except FileNotFoundError as e:
|
|
323
|
-
|
|
324
|
-
|
|
398
|
+
msg = f"Flow path for {resource_name} does not exist in package {package}."
|
|
399
|
+
raise EvaluationException(
|
|
400
|
+
message=msg,
|
|
401
|
+
internal_message=msg,
|
|
402
|
+
error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
|
|
403
|
+
blame=ErrorBlame.USER_ERROR,
|
|
404
|
+
) from e
|
|
405
|
+
prompty_model_config = construct_prompty_model_config(
|
|
406
|
+
model_config=prompty_model_config, # type: ignore
|
|
407
|
+
default_api_version="2024-06-01",
|
|
408
|
+
user_agent=self.__user_agent(),
|
|
409
|
+
)
|
|
410
|
+
return AsyncPrompty.load(
|
|
325
411
|
source=user_simulator_prompty,
|
|
326
412
|
model=prompty_model_config,
|
|
327
|
-
**
|
|
328
|
-
)
|
|
413
|
+
**user_simulator_prompty_options,
|
|
414
|
+
) # type: ignore
|
|
329
415
|
|
|
330
416
|
def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]:
|
|
331
417
|
"""
|
|
332
418
|
Parses the response from the prompty execution.
|
|
333
419
|
|
|
334
420
|
:keyword response: The raw response from the prompty.
|
|
335
|
-
:paramtype
|
|
421
|
+
:paramtype response: str
|
|
336
422
|
:return: A dictionary representing the parsed response content.
|
|
337
423
|
:rtype: Dict[str, Any]
|
|
338
424
|
:raises ValueError: If the response cannot be parsed.
|
|
@@ -372,8 +458,8 @@ class Simulator:
|
|
|
372
458
|
text: str,
|
|
373
459
|
num_queries: int,
|
|
374
460
|
query_response_generating_prompty: Optional[str],
|
|
375
|
-
|
|
376
|
-
prompty_model_config:
|
|
461
|
+
query_response_generating_prompty_options: Dict[str, Any],
|
|
462
|
+
prompty_model_config: Any,
|
|
377
463
|
**kwargs,
|
|
378
464
|
) -> List[Dict[str, str]]:
|
|
379
465
|
"""
|
|
@@ -385,25 +471,32 @@ class Simulator:
|
|
|
385
471
|
:paramtype num_queries: int
|
|
386
472
|
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
387
473
|
:paramtype query_response_generating_prompty: Optional[str]
|
|
388
|
-
:keyword
|
|
389
|
-
:paramtype
|
|
474
|
+
:keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty.
|
|
475
|
+
:paramtype query_response_generating_prompty_options: Dict[str, Any]
|
|
390
476
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
391
|
-
:paramtype prompty_model_config:
|
|
477
|
+
:paramtype prompty_model_config: Any
|
|
392
478
|
:return: A list of query-response dictionaries.
|
|
393
479
|
:rtype: List[Dict[str, str]]
|
|
394
480
|
:raises RuntimeError: If an error occurs during query generation.
|
|
395
481
|
"""
|
|
396
482
|
query_flow = self._load_query_generation_flow(
|
|
397
|
-
query_response_generating_prompty=query_response_generating_prompty,
|
|
483
|
+
query_response_generating_prompty=query_response_generating_prompty, # type: ignore
|
|
398
484
|
prompty_model_config=prompty_model_config,
|
|
399
|
-
|
|
485
|
+
query_response_generating_prompty_options=query_response_generating_prompty_options,
|
|
400
486
|
)
|
|
401
|
-
|
|
402
487
|
try:
|
|
403
|
-
query_responses = query_flow(text=text, num_queries=num_queries)
|
|
488
|
+
query_responses = await query_flow(text=text, num_queries=num_queries)
|
|
404
489
|
if isinstance(query_responses, dict):
|
|
405
490
|
keys = list(query_responses.keys())
|
|
406
491
|
return query_responses[keys[0]]
|
|
492
|
+
if isinstance(query_responses, str):
|
|
493
|
+
query_responses = json.loads(query_responses)
|
|
494
|
+
if isinstance(query_responses, dict):
|
|
495
|
+
if len(query_responses.keys()) == 1:
|
|
496
|
+
return query_responses[list(query_responses.keys())[0]]
|
|
497
|
+
return query_responses # type: ignore
|
|
498
|
+
if isinstance(query_responses, list):
|
|
499
|
+
return query_responses
|
|
407
500
|
return json.loads(query_responses)
|
|
408
501
|
except Exception as e:
|
|
409
502
|
raise RuntimeError("Error generating query responses") from e
|
|
@@ -411,47 +504,65 @@ class Simulator:
|
|
|
411
504
|
def _load_query_generation_flow(
|
|
412
505
|
self,
|
|
413
506
|
*,
|
|
414
|
-
query_response_generating_prompty: Union[str, os.PathLike],
|
|
507
|
+
query_response_generating_prompty: Optional[Union[str, os.PathLike]],
|
|
415
508
|
prompty_model_config: Dict[str, Any],
|
|
416
|
-
|
|
417
|
-
) ->
|
|
509
|
+
query_response_generating_prompty_options: Dict[str, Any],
|
|
510
|
+
) -> "AsyncPrompty":
|
|
418
511
|
"""
|
|
419
512
|
Loads the flow for generating query responses.
|
|
420
513
|
|
|
421
514
|
:keyword query_response_generating_prompty: Path to the query response generating prompty file.
|
|
422
|
-
:paramtype query_response_generating_prompty: Union[str, os.PathLike]
|
|
515
|
+
:paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]]
|
|
423
516
|
:keyword prompty_model_config: The configuration for the prompty model.
|
|
424
517
|
:paramtype prompty_model_config: Dict[str, Any]
|
|
425
|
-
:keyword
|
|
426
|
-
:paramtype
|
|
518
|
+
:keyword query_response_generating_prompty_options: Additional keyword arguments for the flow.
|
|
519
|
+
:paramtype query_response_generating_prompty_options: Dict[str, Any]
|
|
427
520
|
:return: The loaded flow for generating query responses.
|
|
428
|
-
:rtype:
|
|
521
|
+
:rtype: AsyncPrompty
|
|
429
522
|
"""
|
|
430
523
|
if not query_response_generating_prompty:
|
|
431
524
|
package = "azure.ai.evaluation.simulator._prompty"
|
|
432
525
|
resource_name = "task_query_response.prompty"
|
|
433
526
|
try:
|
|
434
527
|
# Access the resource as a file path
|
|
528
|
+
# pylint: disable=deprecated-method
|
|
435
529
|
with pkg_resources.path(package, resource_name) as prompty_path:
|
|
436
|
-
|
|
530
|
+
prompty_model_config = construct_prompty_model_config(
|
|
531
|
+
model_config=prompty_model_config, # type: ignore
|
|
532
|
+
default_api_version="2024-06-01",
|
|
533
|
+
user_agent=self.__user_agent(),
|
|
534
|
+
)
|
|
535
|
+
return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore
|
|
437
536
|
except FileNotFoundError as e:
|
|
438
|
-
|
|
439
|
-
|
|
537
|
+
msg = f"Flow path for {resource_name} does not exist in package {package}."
|
|
538
|
+
raise EvaluationException(
|
|
539
|
+
message=msg,
|
|
540
|
+
internal_message=msg,
|
|
541
|
+
error_category=ErrorCategory.FILE_OR_FOLDER_NOT_FOUND,
|
|
542
|
+
blame=ErrorBlame.USER_ERROR,
|
|
543
|
+
) from e
|
|
544
|
+
prompty_model_config = construct_prompty_model_config(
|
|
545
|
+
model_config=prompty_model_config, # type: ignore
|
|
546
|
+
default_api_version="2024-06-01",
|
|
547
|
+
user_agent=self.__user_agent(),
|
|
548
|
+
)
|
|
549
|
+
return AsyncPrompty.load(
|
|
440
550
|
source=query_response_generating_prompty,
|
|
441
551
|
model=prompty_model_config,
|
|
442
|
-
**
|
|
443
|
-
)
|
|
552
|
+
**query_response_generating_prompty_options,
|
|
553
|
+
) # type: ignore
|
|
444
554
|
|
|
445
555
|
async def _create_conversations_from_query_responses(
|
|
446
556
|
self,
|
|
447
557
|
*,
|
|
448
558
|
query_responses: List[Dict[str, str]],
|
|
449
559
|
max_conversation_turns: int,
|
|
450
|
-
tasks: List[
|
|
560
|
+
tasks: List[str],
|
|
451
561
|
user_simulator_prompty: Optional[str],
|
|
452
|
-
|
|
562
|
+
user_simulator_prompty_options: Dict[str, Any],
|
|
453
563
|
target: Callable,
|
|
454
564
|
api_call_delay_sec: float,
|
|
565
|
+
text: str,
|
|
455
566
|
) -> List[JsonLineChatProtocol]:
|
|
456
567
|
"""
|
|
457
568
|
Creates full conversations from query-response pairs.
|
|
@@ -461,15 +572,17 @@ class Simulator:
|
|
|
461
572
|
:keyword max_conversation_turns: The maximum number of conversation turns.
|
|
462
573
|
:paramtype max_conversation_turns: int
|
|
463
574
|
:keyword tasks: A list of tasks for the simulation.
|
|
464
|
-
:paramtype tasks: List[
|
|
575
|
+
:paramtype tasks: List[str]
|
|
465
576
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
466
577
|
:paramtype user_simulator_prompty: Optional[str]
|
|
467
|
-
:keyword
|
|
468
|
-
:paramtype
|
|
578
|
+
:keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
|
|
579
|
+
:paramtype user_simulator_prompty_options: Dict[str, Any]
|
|
469
580
|
:keyword target: The target function to call for responses.
|
|
470
581
|
:paramtype target: Callable
|
|
471
582
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
472
583
|
:paramtype api_call_delay_sec: float
|
|
584
|
+
:keyword text: The initial input text for generating query responses.
|
|
585
|
+
:paramtype text: str
|
|
473
586
|
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
|
|
474
587
|
:rtype: List[JsonLineChatProtocol]
|
|
475
588
|
"""
|
|
@@ -486,14 +599,17 @@ class Simulator:
|
|
|
486
599
|
for i, query_response_pair in enumerate(query_responses):
|
|
487
600
|
query = query_response_pair["q"]
|
|
488
601
|
response = query_response_pair["r"]
|
|
489
|
-
|
|
602
|
+
try:
|
|
603
|
+
task = tasks[i]
|
|
604
|
+
except IndexError:
|
|
605
|
+
task = None
|
|
490
606
|
|
|
491
607
|
conversation = await self._complete_conversation(
|
|
492
608
|
conversation_starter=query,
|
|
493
609
|
max_conversation_turns=max_conversation_turns,
|
|
494
|
-
task=task,
|
|
610
|
+
task=task, # type: ignore
|
|
495
611
|
user_simulator_prompty=user_simulator_prompty,
|
|
496
|
-
|
|
612
|
+
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
497
613
|
target=target,
|
|
498
614
|
api_call_delay_sec=api_call_delay_sec,
|
|
499
615
|
progress_bar=progress_bar,
|
|
@@ -507,6 +623,7 @@ class Simulator:
|
|
|
507
623
|
"task": task,
|
|
508
624
|
"expected_response": response,
|
|
509
625
|
"query": query,
|
|
626
|
+
"original_text": text,
|
|
510
627
|
},
|
|
511
628
|
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
512
629
|
}
|
|
@@ -520,13 +637,13 @@ class Simulator:
|
|
|
520
637
|
*,
|
|
521
638
|
conversation_starter: str,
|
|
522
639
|
max_conversation_turns: int,
|
|
523
|
-
task: str,
|
|
640
|
+
task: Optional[str],
|
|
524
641
|
user_simulator_prompty: Optional[str],
|
|
525
|
-
|
|
642
|
+
user_simulator_prompty_options: Dict[str, Any],
|
|
526
643
|
target: Callable,
|
|
527
644
|
api_call_delay_sec: float,
|
|
528
645
|
progress_bar: tqdm,
|
|
529
|
-
) -> List[Dict[str, str]]:
|
|
646
|
+
) -> List[Dict[str, Optional[str]]]:
|
|
530
647
|
"""
|
|
531
648
|
Completes a conversation with the target model based on the conversation starter.
|
|
532
649
|
|
|
@@ -538,8 +655,8 @@ class Simulator:
|
|
|
538
655
|
:paramtype task: str
|
|
539
656
|
:keyword user_simulator_prompty: Path to the user simulator prompty file.
|
|
540
657
|
:paramtype user_simulator_prompty: Optional[str]
|
|
541
|
-
:keyword
|
|
542
|
-
:paramtype
|
|
658
|
+
:keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty.
|
|
659
|
+
:paramtype user_simulator_prompty_options: Dict[str, Any]
|
|
543
660
|
:keyword target: The target function to call for responses.
|
|
544
661
|
:paramtype target: Callable
|
|
545
662
|
:keyword api_call_delay_sec: Delay in seconds between API calls.
|
|
@@ -547,36 +664,48 @@ class Simulator:
|
|
|
547
664
|
:keyword progress_bar: Progress bar for tracking simulation progress.
|
|
548
665
|
:paramtype progress_bar: tqdm
|
|
549
666
|
:return: A list representing the conversation history with each turn's content.
|
|
550
|
-
:rtype: List[Dict[str, str]]
|
|
667
|
+
:rtype: List[Dict[str, Optional[str]]]
|
|
551
668
|
"""
|
|
552
669
|
conversation_history = ConversationHistory()
|
|
553
|
-
# user_turn = Turn(role=ConversationRole.USER, content=conversation_starter)
|
|
554
|
-
# conversation_history.add_to_history(user_turn)
|
|
555
670
|
|
|
556
671
|
while len(conversation_history) < max_conversation_turns:
|
|
557
672
|
user_flow = self._load_user_simulation_flow(
|
|
558
|
-
user_simulator_prompty=user_simulator_prompty,
|
|
559
|
-
prompty_model_config=self.
|
|
560
|
-
|
|
673
|
+
user_simulator_prompty=user_simulator_prompty, # type: ignore
|
|
674
|
+
prompty_model_config=self.model_config, # type: ignore
|
|
675
|
+
user_simulator_prompty_options=user_simulator_prompty_options,
|
|
561
676
|
)
|
|
562
|
-
|
|
563
|
-
task
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
677
|
+
if len(conversation_history) == 0:
|
|
678
|
+
if task:
|
|
679
|
+
conversation_starter_from_simulated_user = await user_flow(
|
|
680
|
+
task=task,
|
|
681
|
+
conversation_history=[
|
|
682
|
+
{
|
|
683
|
+
"role": "assistant",
|
|
684
|
+
"content": conversation_starter,
|
|
685
|
+
}
|
|
686
|
+
],
|
|
687
|
+
action="rewrite the assistant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.",
|
|
688
|
+
)
|
|
689
|
+
else:
|
|
690
|
+
conversation_starter_from_simulated_user = {
|
|
567
691
|
"content": conversation_starter,
|
|
568
|
-
"your_task": "Act as the user and translate the content into a user query.",
|
|
569
692
|
}
|
|
570
|
-
|
|
571
|
-
|
|
693
|
+
else:
|
|
694
|
+
conversation_starter_from_simulated_user = await user_flow(
|
|
695
|
+
task=task,
|
|
696
|
+
conversation_history=conversation_history.to_context_free_list(),
|
|
697
|
+
action="Your goal is to make sure the task is completed by asking the right questions. Do not ask the same questions again.",
|
|
698
|
+
)
|
|
572
699
|
if isinstance(conversation_starter_from_simulated_user, dict):
|
|
573
700
|
conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"]
|
|
574
701
|
user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user)
|
|
575
702
|
conversation_history.add_to_history(user_turn)
|
|
576
|
-
assistant_response = await self._get_target_response(
|
|
703
|
+
assistant_response, assistant_context = await self._get_target_response(
|
|
577
704
|
target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=conversation_history
|
|
578
705
|
)
|
|
579
|
-
assistant_turn = Turn(
|
|
706
|
+
assistant_turn = Turn(
|
|
707
|
+
role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context
|
|
708
|
+
)
|
|
580
709
|
conversation_history.add_to_history(assistant_turn)
|
|
581
710
|
progress_bar.update(1)
|
|
582
711
|
|
|
@@ -585,44 +714,9 @@ class Simulator:
|
|
|
585
714
|
|
|
586
715
|
return conversation_history.to_list()
|
|
587
716
|
|
|
588
|
-
async def _build_user_simulation_response(
|
|
589
|
-
self,
|
|
590
|
-
task: str,
|
|
591
|
-
conversation_history: List[Dict[str, Any]],
|
|
592
|
-
user_simulator_prompty: Optional[str],
|
|
593
|
-
user_simulator_prompty_kwargs: Dict[str, Any],
|
|
594
|
-
) -> str:
|
|
595
|
-
"""
|
|
596
|
-
Builds a response from the user simulator based on the current conversation history.
|
|
597
|
-
|
|
598
|
-
:param task: A string representing the task details.
|
|
599
|
-
:type task: str
|
|
600
|
-
:param conversation_history: The current conversation history as a list of dictionaries.
|
|
601
|
-
:type conversation_history: List[Dict[str, Any]]
|
|
602
|
-
:param user_simulator_prompty: Path to the user simulator prompty file.
|
|
603
|
-
:type user_simulator_prompty: Optional[str]
|
|
604
|
-
:param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty.
|
|
605
|
-
:type user_simulator_prompty_kwargs: Dict[str, Any]
|
|
606
|
-
:return: The generated response content from the user simulator.
|
|
607
|
-
:rtype: str
|
|
608
|
-
:raises RuntimeError: If an error occurs during response generation.
|
|
609
|
-
"""
|
|
610
|
-
user_flow = self._load_user_simulation_flow(
|
|
611
|
-
user_simulator_prompty=user_simulator_prompty,
|
|
612
|
-
prompty_model_config=self._build_prompty_model_config(),
|
|
613
|
-
user_simulator_prompty_kwargs=user_simulator_prompty_kwargs,
|
|
614
|
-
)
|
|
615
|
-
|
|
616
|
-
try:
|
|
617
|
-
response_content = user_flow(task=task, conversation_history=conversation_history)
|
|
618
|
-
user_response = self._parse_prompty_response(response=response_content)
|
|
619
|
-
return user_response["content"]
|
|
620
|
-
except Exception as e:
|
|
621
|
-
raise RuntimeError("Error building user simulation response") from e
|
|
622
|
-
|
|
623
717
|
async def _get_target_response(
|
|
624
718
|
self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory
|
|
625
|
-
) -> str:
|
|
719
|
+
) -> Tuple[str, Optional[str]]:
|
|
626
720
|
"""
|
|
627
721
|
Retrieves the response from the target callback based on the current conversation history.
|
|
628
722
|
|
|
@@ -632,8 +726,8 @@ class Simulator:
|
|
|
632
726
|
:paramtype api_call_delay_sec: float
|
|
633
727
|
:keyword conversation_history: The current conversation history.
|
|
634
728
|
:paramtype conversation_history: ConversationHistory
|
|
635
|
-
:return: The content of the response from the target.
|
|
636
|
-
:rtype: str
|
|
729
|
+
:return: The content of the response from the target and an optional context.
|
|
730
|
+
:rtype: str, Optional[str]
|
|
637
731
|
"""
|
|
638
732
|
response = await target(
|
|
639
733
|
messages={"messages": conversation_history.to_list()},
|
|
@@ -643,4 +737,4 @@ class Simulator:
|
|
|
643
737
|
)
|
|
644
738
|
await asyncio.sleep(api_call_delay_sec)
|
|
645
739
|
latest_message = response["messages"][-1]
|
|
646
|
-
return latest_message["content"]
|
|
740
|
+
return latest_message["content"], latest_message.get("context", "") # type: ignore
|