azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import os
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any, Dict, List
|
|
6
6
|
from urllib.parse import urljoin, urlparse
|
|
7
|
+
import base64
|
|
8
|
+
import json
|
|
7
9
|
|
|
8
10
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
9
11
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client, get_http_client
|
|
10
12
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
11
|
-
from azure.ai.evaluation._user_agent import
|
|
13
|
+
from azure.ai.evaluation._user_agent import UserAgentSingleton
|
|
12
14
|
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
13
15
|
|
|
14
16
|
from ._identity_manager import APITokenManager
|
|
@@ -57,9 +59,11 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
57
59
|
# add a "/" at the end of the url
|
|
58
60
|
self.api_url = self.api_url.rstrip("/") + "/"
|
|
59
61
|
self.parameter_json_endpoint = urljoin(self.api_url, "simulation/template/parameters")
|
|
62
|
+
self.parameter_image_endpoint = urljoin(self.api_url, "simulation/template/parameters/image")
|
|
60
63
|
self.jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak")
|
|
61
64
|
self.simulation_submit_endpoint = urljoin(self.api_url, "simulation/chat/completions/submit")
|
|
62
65
|
self.xpia_jailbreaks_json_endpoint = urljoin(self.api_url, "simulation/jailbreak/xpia")
|
|
66
|
+
self.attack_objectives_endpoint = urljoin(self.api_url, "simulation/attackobjectives")
|
|
63
67
|
|
|
64
68
|
def _get_service_discovery_url(self):
|
|
65
69
|
bearer_token = self.token_manager.get_token()
|
|
@@ -74,14 +78,18 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
74
78
|
timeout=5,
|
|
75
79
|
)
|
|
76
80
|
if response.status_code != 200:
|
|
77
|
-
msg =
|
|
81
|
+
msg = (
|
|
82
|
+
f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
|
|
83
|
+
f"correctly, and make sure you have the necessary access permissions. "
|
|
84
|
+
f"Status code: {response.status_code}."
|
|
85
|
+
)
|
|
78
86
|
raise EvaluationException(
|
|
79
87
|
message=msg,
|
|
80
|
-
internal_message=msg,
|
|
81
88
|
target=ErrorTarget.RAI_CLIENT,
|
|
82
|
-
category=ErrorCategory.
|
|
83
|
-
blame=ErrorBlame.
|
|
89
|
+
category=ErrorCategory.PROJECT_ACCESS_ERROR,
|
|
90
|
+
blame=ErrorBlame.USER_ERROR,
|
|
84
91
|
)
|
|
92
|
+
|
|
85
93
|
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
|
|
86
94
|
return f"{base_url.scheme}://{base_url.netloc}"
|
|
87
95
|
|
|
@@ -140,7 +148,7 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
140
148
|
headers = {
|
|
141
149
|
"Authorization": f"Bearer {token}",
|
|
142
150
|
"Content-Type": "application/json",
|
|
143
|
-
"User-Agent":
|
|
151
|
+
"User-Agent": UserAgentSingleton().value,
|
|
144
152
|
}
|
|
145
153
|
|
|
146
154
|
session = self._create_async_client()
|
|
@@ -162,3 +170,97 @@ class RAIClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
162
170
|
category=ErrorCategory.UNKNOWN,
|
|
163
171
|
blame=ErrorBlame.USER_ERROR,
|
|
164
172
|
)
|
|
173
|
+
|
|
174
|
+
async def get_image_data(self, path: str) -> Any:
|
|
175
|
+
"""Make a GET Image request to the given url
|
|
176
|
+
|
|
177
|
+
:param path: The url of the image
|
|
178
|
+
:type path: str
|
|
179
|
+
:raises EvaluationException: If the Azure safety evaluation service is not available in the current region
|
|
180
|
+
:return: The response
|
|
181
|
+
:rtype: Any
|
|
182
|
+
"""
|
|
183
|
+
token = self.token_manager.get_token()
|
|
184
|
+
headers = {
|
|
185
|
+
"Authorization": f"Bearer {token}",
|
|
186
|
+
"Content-Type": "application/json",
|
|
187
|
+
"User-Agent": UserAgentSingleton().value,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
session = self._create_async_client()
|
|
191
|
+
params = {"path": path}
|
|
192
|
+
async with session:
|
|
193
|
+
response = await session.get(
|
|
194
|
+
url=self.parameter_image_endpoint, params=params, headers=headers
|
|
195
|
+
) # pylint: disable=unexpected-keyword-arg
|
|
196
|
+
|
|
197
|
+
if response.status_code == 200:
|
|
198
|
+
return base64.b64encode(response.content).decode("utf-8")
|
|
199
|
+
|
|
200
|
+
msg = (
|
|
201
|
+
"Azure safety evaluation service is not available in your current region, "
|
|
202
|
+
+ "please go to https://aka.ms/azureaistudiosafetyeval to see which regions are supported"
|
|
203
|
+
)
|
|
204
|
+
raise EvaluationException(
|
|
205
|
+
message=msg,
|
|
206
|
+
internal_message=msg,
|
|
207
|
+
target=ErrorTarget.RAI_CLIENT,
|
|
208
|
+
category=ErrorCategory.UNKNOWN,
|
|
209
|
+
blame=ErrorBlame.USER_ERROR,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
async def get_attack_objectives(
|
|
213
|
+
self, risk_categories: List[str], application_scenario: str = None, strategy: str = None
|
|
214
|
+
) -> Any:
|
|
215
|
+
"""Get the attack objectives based on risk categories and application scenario
|
|
216
|
+
|
|
217
|
+
:param risk_categories: List of risk categories to generate attack objectives for
|
|
218
|
+
:type risk_categories: List[str]
|
|
219
|
+
:param application_scenario: Optional description of the application scenario for context
|
|
220
|
+
:type application_scenario: str
|
|
221
|
+
:param strategy: Optional attack strategy to get specific objectives for
|
|
222
|
+
:type strategy: str
|
|
223
|
+
:return: The attack objectives
|
|
224
|
+
:rtype: Any
|
|
225
|
+
"""
|
|
226
|
+
# Create query parameters for the request
|
|
227
|
+
if application_scenario:
|
|
228
|
+
raise NotImplementedError("Application scenario is not supported yet")
|
|
229
|
+
|
|
230
|
+
params = {
|
|
231
|
+
"api-version": "2022-11-01-preview",
|
|
232
|
+
"riskTypes": ",".join(risk_categories),
|
|
233
|
+
"lang": "en", # Default to English
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# Add strategy parameter if provided
|
|
237
|
+
if strategy:
|
|
238
|
+
params["strategy"] = strategy
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
# Make the request using the existing get method
|
|
242
|
+
result = await self.get(self.attack_objectives_endpoint)
|
|
243
|
+
# from collections import defaultdict
|
|
244
|
+
# counts_by_risk = defaultdict(int)
|
|
245
|
+
# for item in result:
|
|
246
|
+
# target_harms = item.get("Metadata", {}).get("TargetHarms", [])
|
|
247
|
+
# if not target_harms:
|
|
248
|
+
# # No risk type specified
|
|
249
|
+
# counts_by_risk["empty"] += 1
|
|
250
|
+
# else:
|
|
251
|
+
# for harm in target_harms:
|
|
252
|
+
# # Use "empty" if the risk type field is missing
|
|
253
|
+
# risk_type = harm.get("RiskType", "") or "empty"
|
|
254
|
+
# counts_by_risk[risk_type] += 1
|
|
255
|
+
return result
|
|
256
|
+
except Exception:
|
|
257
|
+
# If the API fails or isn't implemented yet, return a mock response
|
|
258
|
+
# This is temporary until the API endpoint is fully implemented
|
|
259
|
+
return [
|
|
260
|
+
{
|
|
261
|
+
"metadata": {"lang": "en", "target_harms": [{"risk-type": "violence", "risk-subtype": ""}]},
|
|
262
|
+
"messages": [{"role": "user", "content": "Risky content"}],
|
|
263
|
+
"modality": "text",
|
|
264
|
+
"source": ["source"],
|
|
265
|
+
}
|
|
266
|
+
]
|
|
@@ -2,25 +2,69 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
from typing import Optional
|
|
5
|
+
from typing import Dict, List, Optional, TypedDict, cast, Union
|
|
6
|
+
from ast import literal_eval
|
|
7
|
+
from typing_extensions import NotRequired
|
|
6
8
|
|
|
7
9
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
10
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
11
|
+
from azure.ai.evaluation.simulator._adversarial_scenario import AdversarialScenario
|
|
8
12
|
|
|
9
13
|
from ._rai_client import RAIClient
|
|
10
14
|
|
|
11
|
-
CONTENT_HARM_TEMPLATES_COLLECTION_KEY =
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
15
|
+
CONTENT_HARM_TEMPLATES_COLLECTION_KEY = {
|
|
16
|
+
"adv_qa",
|
|
17
|
+
"adv_conversation",
|
|
18
|
+
"adv_summarization",
|
|
19
|
+
"adv_search",
|
|
20
|
+
"adv_rewrite",
|
|
21
|
+
"adv_content_gen_ungrounded",
|
|
22
|
+
"adv_content_gen_grounded",
|
|
23
|
+
"adv_content_protected_material",
|
|
24
|
+
"adv_politics",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TemplateParameters(TypedDict):
|
|
29
|
+
"""Parameters used in Templates
|
|
30
|
+
|
|
31
|
+
.. note::
|
|
32
|
+
|
|
33
|
+
This type is good enough to type check, but is incorrect. It's meant to represent a dictionary with a known
|
|
34
|
+
`metadata` key (Dict[str, str]), a known `ch_template_placeholder` key (str), and an unknown number of keys
|
|
35
|
+
that map to `str` values.
|
|
36
|
+
|
|
37
|
+
In typescript, this type would be spelled:
|
|
38
|
+
|
|
39
|
+
.. code-block:: typescript
|
|
40
|
+
|
|
41
|
+
type AdversarialTemplateParameters = {
|
|
42
|
+
[key: string]: string
|
|
43
|
+
ch_template_placeholder: string
|
|
44
|
+
metadata: {[index: string]: string} # Doesn't typecheck but gets the point across
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
At time of writing, this isn't possible to express with a TypedDict. TypedDicts must be "closed" in that
|
|
48
|
+
they fully specify all the keys they can contain.
|
|
49
|
+
|
|
50
|
+
`PEP 728 – TypedDict with Typed Extra Items <https://peps.python.org/pep-0728/>` is a proposal to support
|
|
51
|
+
this, but would only be available in Python 3.13 at the earliest.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
metadata: Dict[str, str]
|
|
55
|
+
conversation_starter: str
|
|
56
|
+
ch_template_placeholder: str
|
|
57
|
+
group_of_people: NotRequired[str]
|
|
58
|
+
category: NotRequired[str]
|
|
59
|
+
target_population: NotRequired[str]
|
|
60
|
+
topic: NotRequired[str]
|
|
61
|
+
jailbreak_string: NotRequired[str]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class _CategorizedParameter(TypedDict):
|
|
65
|
+
parameters: List[TemplateParameters]
|
|
66
|
+
category: str
|
|
67
|
+
parameters_key: str
|
|
24
68
|
|
|
25
69
|
|
|
26
70
|
class ContentHarmTemplatesUtils:
|
|
@@ -85,45 +129,53 @@ class AdversarialTemplate:
|
|
|
85
129
|
:param template_parameters: The template parameters.
|
|
86
130
|
"""
|
|
87
131
|
|
|
88
|
-
def __init__(
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
template_name: str,
|
|
135
|
+
text: Optional[str],
|
|
136
|
+
context_key: List,
|
|
137
|
+
template_parameters: Optional[List[TemplateParameters]] = None,
|
|
138
|
+
) -> None:
|
|
89
139
|
self.text = text
|
|
90
140
|
self.context_key = context_key
|
|
91
141
|
self.template_name = template_name
|
|
92
|
-
self.template_parameters = template_parameters
|
|
142
|
+
self.template_parameters = template_parameters or []
|
|
93
143
|
|
|
94
|
-
def __str__(self):
|
|
144
|
+
def __str__(self) -> str:
|
|
95
145
|
return "{{ch_template_placeholder}}"
|
|
96
146
|
|
|
97
147
|
|
|
98
148
|
class AdversarialTemplateHandler:
|
|
99
149
|
"""
|
|
100
|
-
|
|
150
|
+
Initialize the AdversarialTemplateHandler.
|
|
101
151
|
|
|
102
|
-
:param azure_ai_project: The Azure AI project
|
|
103
|
-
|
|
104
|
-
:
|
|
105
|
-
:
|
|
152
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
153
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
154
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
155
|
+
:param rai_client: The RAI client or AI Project client used for fetching parameters.
|
|
156
|
+
:type rai_client: Union[~azure.ai.evaluation.simulator._model_tools.RAIClient, ~azure.ai.evaluation._common.onedp._client.AIProjectClient]
|
|
106
157
|
"""
|
|
107
158
|
|
|
108
|
-
def __init__(
|
|
109
|
-
self
|
|
110
|
-
|
|
159
|
+
def __init__(
|
|
160
|
+
self, azure_ai_project: Union[str, AzureAIProject], rai_client: Union[RAIClient, AIProjectClient]
|
|
161
|
+
) -> None:
|
|
111
162
|
self.azure_ai_project = azure_ai_project
|
|
112
|
-
self.categorized_ch_parameters = None
|
|
163
|
+
self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
|
|
113
164
|
self.rai_client = rai_client
|
|
114
165
|
|
|
115
|
-
async def _get_content_harm_template_collections(self, collection_key):
|
|
116
|
-
|
|
166
|
+
async def _get_content_harm_template_collections(self, collection_key: str) -> List[AdversarialTemplate]:
|
|
117
167
|
if self.categorized_ch_parameters is None:
|
|
118
|
-
categorized_parameters = {}
|
|
168
|
+
categorized_parameters: Dict[str, _CategorizedParameter] = {}
|
|
119
169
|
util = ContentHarmTemplatesUtils
|
|
120
|
-
|
|
121
|
-
|
|
170
|
+
if isinstance(self.rai_client, RAIClient):
|
|
171
|
+
parameters = await self.rai_client.get_contentharm_parameters()
|
|
172
|
+
elif isinstance(self.rai_client, AIProjectClient):
|
|
173
|
+
parameters = literal_eval(self.rai_client.red_teams.get_template_parameters())
|
|
122
174
|
|
|
123
175
|
for k in parameters.keys():
|
|
124
176
|
template_key = util.get_template_key(k)
|
|
125
177
|
categorized_parameters[template_key] = {
|
|
126
|
-
"parameters": parameters[k],
|
|
178
|
+
"parameters": cast(List[TemplateParameters], parameters[k]),
|
|
127
179
|
"category": util.get_template_category(k),
|
|
128
180
|
"parameters_key": k,
|
|
129
181
|
}
|
|
@@ -131,17 +183,29 @@ class AdversarialTemplateHandler:
|
|
|
131
183
|
|
|
132
184
|
template_category = collection_key.split("adv_")[-1]
|
|
133
185
|
|
|
186
|
+
# Handle both qa_enterprise and qa_documents mapping to qa
|
|
187
|
+
if template_category in ["qa_enterprise", "qa_documents"]:
|
|
188
|
+
template_category = "qa"
|
|
189
|
+
|
|
134
190
|
plist = self.categorized_ch_parameters
|
|
135
191
|
ch_templates = []
|
|
192
|
+
|
|
136
193
|
for key, value in plist.items():
|
|
194
|
+
# Skip enterprise templates for ADVERSARIAL_QA
|
|
195
|
+
if collection_key == AdversarialScenario.ADVERSARIAL_QA.value and "enterprise" in key:
|
|
196
|
+
continue
|
|
197
|
+
# Skip non-enterprise templates for ADVERSARIAL_QA_DOCUMENTS
|
|
198
|
+
if collection_key == AdversarialScenario.ADVERSARIAL_QA_DOCUMENTS.value and "enterprise" not in key:
|
|
199
|
+
continue
|
|
200
|
+
|
|
137
201
|
if value["category"] == template_category:
|
|
138
202
|
params = value["parameters"]
|
|
139
203
|
for p in params:
|
|
140
204
|
p.update({"ch_template_placeholder": "{{ch_template_placeholder}}"})
|
|
141
205
|
|
|
142
206
|
template = AdversarialTemplate(template_name=key, text=None, context_key=[], template_parameters=params)
|
|
143
|
-
|
|
144
207
|
ch_templates.append(template)
|
|
208
|
+
|
|
145
209
|
return ch_templates
|
|
146
210
|
|
|
147
211
|
def get_template(self, template_name: str) -> Optional[AdversarialTemplate]:
|
|
@@ -12,6 +12,8 @@ from abc import ABC, abstractmethod
|
|
|
12
12
|
from collections import deque
|
|
13
13
|
from typing import Deque, Dict, List, Optional, Union
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
16
|
+
from ._rai_client import RAIClient
|
|
15
17
|
|
|
16
18
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
17
19
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
@@ -49,10 +51,10 @@ class LLMBase(ABC):
|
|
|
49
51
|
Base class for all LLM models.
|
|
50
52
|
"""
|
|
51
53
|
|
|
52
|
-
def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[
|
|
54
|
+
def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[Dict[str, str]] = None):
|
|
53
55
|
self.endpoint_url = endpoint_url
|
|
54
56
|
self.name = name
|
|
55
|
-
self.additional_headers = additional_headers
|
|
57
|
+
self.additional_headers = additional_headers or {}
|
|
56
58
|
self.logger = logging.getLogger(repr(self))
|
|
57
59
|
|
|
58
60
|
# Metric tracking
|
|
@@ -78,7 +80,7 @@ class LLMBase(ABC):
|
|
|
78
80
|
async def get_completion(
|
|
79
81
|
self,
|
|
80
82
|
prompt: str,
|
|
81
|
-
session: AsyncHttpPipeline,
|
|
83
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
82
84
|
**request_params,
|
|
83
85
|
) -> dict:
|
|
84
86
|
"""
|
|
@@ -100,7 +102,7 @@ class LLMBase(ABC):
|
|
|
100
102
|
async def get_all_completions(
|
|
101
103
|
self,
|
|
102
104
|
prompts: List[str],
|
|
103
|
-
session: AsyncHttpPipeline,
|
|
105
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
104
106
|
api_call_max_parallel_count: int,
|
|
105
107
|
api_call_delay_seconds: float,
|
|
106
108
|
request_error_rate_threshold: float,
|
|
@@ -120,7 +122,7 @@ class LLMBase(ABC):
|
|
|
120
122
|
async def get_conversation_completion(
|
|
121
123
|
self,
|
|
122
124
|
messages: List[dict],
|
|
123
|
-
session: AsyncHttpPipeline,
|
|
125
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
124
126
|
role: str,
|
|
125
127
|
**request_params,
|
|
126
128
|
) -> dict:
|
|
@@ -208,7 +210,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
208
210
|
*,
|
|
209
211
|
endpoint_url: str,
|
|
210
212
|
name: str = "OpenAICompletionsModel",
|
|
211
|
-
additional_headers: Optional[
|
|
213
|
+
additional_headers: Optional[Dict[str, str]] = None,
|
|
212
214
|
api_version: Optional[str] = "2023-03-15-preview",
|
|
213
215
|
token_manager: APITokenManager,
|
|
214
216
|
azureml_model_deployment: Optional[str] = None,
|
|
@@ -220,7 +222,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
220
222
|
frequency_penalty: Optional[float] = 0,
|
|
221
223
|
presence_penalty: Optional[float] = 0,
|
|
222
224
|
stop: Optional[Union[List[str], str]] = None,
|
|
223
|
-
image_captions: Dict[str, str] =
|
|
225
|
+
image_captions: Optional[Dict[str, str]] = None,
|
|
224
226
|
images_dir: Optional[str] = None, # Note: unused, kept for class compatibility
|
|
225
227
|
):
|
|
226
228
|
super().__init__(endpoint_url=endpoint_url, name=name, additional_headers=additional_headers)
|
|
@@ -234,7 +236,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
234
236
|
self.n = n
|
|
235
237
|
self.frequency_penalty = frequency_penalty
|
|
236
238
|
self.presence_penalty = presence_penalty
|
|
237
|
-
self.image_captions = image_captions
|
|
239
|
+
self.image_captions = image_captions or {}
|
|
238
240
|
|
|
239
241
|
# Default stop to end token if not provided
|
|
240
242
|
if not stop:
|
|
@@ -263,7 +265,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
263
265
|
def get_model_params(self):
|
|
264
266
|
return {param: getattr(self, param) for param in self.model_param_names if getattr(self, param) is not None}
|
|
265
267
|
|
|
266
|
-
def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]:
|
|
268
|
+
def format_request_data(self, prompt: Dict[str, str], **request_params) -> Dict[str, str]: # type: ignore[override]
|
|
267
269
|
"""
|
|
268
270
|
Format the request data for the OpenAI API.
|
|
269
271
|
"""
|
|
@@ -274,7 +276,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
274
276
|
async def get_conversation_completion(
|
|
275
277
|
self,
|
|
276
278
|
messages: List[dict],
|
|
277
|
-
session: AsyncHttpPipeline,
|
|
279
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
278
280
|
role: str = "assistant",
|
|
279
281
|
**request_params,
|
|
280
282
|
) -> dict:
|
|
@@ -304,7 +306,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
304
306
|
async def get_all_completions( # type: ignore[override]
|
|
305
307
|
self,
|
|
306
308
|
prompts: List[Dict[str, str]],
|
|
307
|
-
session: AsyncHttpPipeline,
|
|
309
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
308
310
|
api_call_max_parallel_count: int = 1,
|
|
309
311
|
api_call_delay_seconds: float = 0.1,
|
|
310
312
|
request_error_rate_threshold: float = 0.5,
|
|
@@ -328,7 +330,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
328
330
|
# Format prompts and tag with index
|
|
329
331
|
request_datas: List[Dict] = []
|
|
330
332
|
for idx, prompt in enumerate(prompts):
|
|
331
|
-
prompt
|
|
333
|
+
prompt = self.format_request_data(prompt, **request_params)
|
|
332
334
|
prompt[self.prompt_idx_key] = idx # type: ignore[assignment]
|
|
333
335
|
request_datas.append(prompt)
|
|
334
336
|
|
|
@@ -372,7 +374,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
372
374
|
self,
|
|
373
375
|
request_datas: List[dict],
|
|
374
376
|
output_collector: List,
|
|
375
|
-
session: AsyncHttpPipeline,
|
|
377
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
376
378
|
api_call_delay_seconds: float = 0.1,
|
|
377
379
|
request_error_rate_threshold: float = 0.5,
|
|
378
380
|
) -> None:
|
|
@@ -433,7 +435,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
433
435
|
|
|
434
436
|
async def request_api(
|
|
435
437
|
self,
|
|
436
|
-
session: AsyncHttpPipeline,
|
|
438
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
437
439
|
request_data: dict,
|
|
438
440
|
) -> dict:
|
|
439
441
|
"""
|
|
@@ -447,7 +449,7 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
447
449
|
|
|
448
450
|
self._log_request(request_data)
|
|
449
451
|
|
|
450
|
-
token =
|
|
452
|
+
token = self.token_manager.get_token()
|
|
451
453
|
|
|
452
454
|
headers = {
|
|
453
455
|
"Content-Type": "application/json",
|
|
@@ -476,11 +478,12 @@ class OpenAICompletionsModel(LLMBase):
|
|
|
476
478
|
time_start = time.time()
|
|
477
479
|
full_response = None
|
|
478
480
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
481
|
+
if isinstance(session, AIProjectClient):
|
|
482
|
+
response_data = session.red_teams.submit_simulation(request_data, headers, params)
|
|
483
|
+
else:
|
|
484
|
+
response = await session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params)
|
|
485
|
+
response.raise_for_status()
|
|
486
|
+
response_data = response.json()
|
|
484
487
|
|
|
485
488
|
self.logger.info(f"Response: {response_data}")
|
|
486
489
|
|
|
@@ -522,8 +525,8 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
|
|
|
522
525
|
formats the prompt for chat completion.
|
|
523
526
|
"""
|
|
524
527
|
|
|
525
|
-
def __init__(self, name="OpenAIChatCompletionsModel",
|
|
526
|
-
super().__init__(name=name,
|
|
528
|
+
def __init__(self, name="OpenAIChatCompletionsModel", **kwargs):
|
|
529
|
+
super().__init__(name=name, **kwargs)
|
|
527
530
|
|
|
528
531
|
def format_request_data(self, messages: List[dict], **request_params): # type: ignore[override]
|
|
529
532
|
request_data = {"messages": messages, **self.get_model_params()}
|
|
@@ -533,7 +536,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
|
|
|
533
536
|
async def get_conversation_completion(
|
|
534
537
|
self,
|
|
535
538
|
messages: List[dict],
|
|
536
|
-
session: AsyncHttpPipeline,
|
|
539
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
537
540
|
role: str = "assistant",
|
|
538
541
|
**request_params,
|
|
539
542
|
) -> dict:
|
|
@@ -544,7 +547,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
|
|
|
544
547
|
----------
|
|
545
548
|
messages: List of messages to query the model with.
|
|
546
549
|
Expected format: [{"role": "user", "content": "Hello!"}, ...]
|
|
547
|
-
session: AsyncHttpPipeline object to query the model with.
|
|
550
|
+
session: Union[AsyncHttpPipeline, AIProjectClient] object to query the model with.
|
|
548
551
|
role: Not used for this model, since it is a chat model.
|
|
549
552
|
request_params: Additional parameters to pass to the model.
|
|
550
553
|
"""
|
|
@@ -560,7 +563,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
|
|
|
560
563
|
async def get_completion(
|
|
561
564
|
self,
|
|
562
565
|
prompt: str,
|
|
563
|
-
session: AsyncHttpPipeline,
|
|
566
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
564
567
|
**request_params,
|
|
565
568
|
) -> dict:
|
|
566
569
|
"""
|
|
@@ -569,7 +572,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
|
|
|
569
572
|
Parameters
|
|
570
573
|
----------
|
|
571
574
|
prompt: Prompt str to query model with.
|
|
572
|
-
session: AsyncHttpPipeline object to use for the request.
|
|
575
|
+
session: Union[AsyncHttpPipeline, AIProjectClient] object to use for the request.
|
|
573
576
|
**request_params: Additional parameters to pass to the request.
|
|
574
577
|
"""
|
|
575
578
|
messages = [{"role": "system", "content": prompt}]
|
|
@@ -583,7 +586,7 @@ class OpenAIChatCompletionsModel(OpenAICompletionsModel):
|
|
|
583
586
|
async def get_all_completions(
|
|
584
587
|
self,
|
|
585
588
|
prompts: List[str], # type: ignore[override]
|
|
586
|
-
session: AsyncHttpPipeline,
|
|
589
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
587
590
|
api_call_max_parallel_count: int = 1,
|
|
588
591
|
api_call_delay_seconds: float = 0.1,
|
|
589
592
|
request_error_rate_threshold: float = 0.5,
|
|
@@ -3,11 +3,6 @@ name: TaskSimulatorQueryResponse
|
|
|
3
3
|
description: Gets queries and responses from a blob of text
|
|
4
4
|
model:
|
|
5
5
|
api: chat
|
|
6
|
-
configuration:
|
|
7
|
-
type: azure_openai
|
|
8
|
-
azure_deployment: ${env:AZURE_DEPLOYMENT}
|
|
9
|
-
api_key: ${env:AZURE_OPENAI_API_KEY}
|
|
10
|
-
azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
|
|
11
6
|
parameters:
|
|
12
7
|
temperature: 0.0
|
|
13
8
|
top_p: 1.0
|
|
@@ -33,15 +28,16 @@ Answer must not be more than 5 words
|
|
|
33
28
|
Answer must be picked from Text as is
|
|
34
29
|
Question should be as descriptive as possible and must include as much context as possible from Text
|
|
35
30
|
Output must always have the provided number of QnAs
|
|
36
|
-
Output must be in JSON format
|
|
31
|
+
Output must be in JSON format.
|
|
32
|
+
Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable.
|
|
37
33
|
Text:
|
|
38
34
|
<|text_start|>
|
|
39
35
|
On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States.
|
|
40
36
|
Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%.
|
|
41
37
|
<|text_end|>
|
|
42
38
|
Output with 5 QnAs:
|
|
43
|
-
|
|
44
|
-
{
|
|
39
|
+
{
|
|
40
|
+
"qna":[{
|
|
45
41
|
"q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?",
|
|
46
42
|
"r": "January 24, 1984"
|
|
47
43
|
},
|
|
@@ -60,8 +56,8 @@ Output with 5 QnAs:
|
|
|
60
56
|
{
|
|
61
57
|
"q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?",
|
|
62
58
|
"r": "6%"
|
|
63
|
-
}
|
|
64
|
-
|
|
59
|
+
}]
|
|
60
|
+
}
|
|
65
61
|
Text:
|
|
66
62
|
<|text_start|>
|
|
67
63
|
{{ text }}
|
|
@@ -3,10 +3,6 @@ name: TaskSimulatorWithPersona
|
|
|
3
3
|
description: Simulates a user to complete a conversation
|
|
4
4
|
model:
|
|
5
5
|
api: chat
|
|
6
|
-
configuration:
|
|
7
|
-
type: azure_openai
|
|
8
|
-
azure_deployment: ${env:AZURE_DEPLOYMENT}
|
|
9
|
-
azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
|
|
10
6
|
parameters:
|
|
11
7
|
temperature: 0.0
|
|
12
8
|
top_p: 1.0
|
|
@@ -20,6 +16,9 @@ inputs:
|
|
|
20
16
|
type: string
|
|
21
17
|
conversation_history:
|
|
22
18
|
type: dict
|
|
19
|
+
action:
|
|
20
|
+
type: string
|
|
21
|
+
default: continue the converasation and make sure the task is completed by asking relevant questions
|
|
23
22
|
|
|
24
23
|
---
|
|
25
24
|
system:
|
|
@@ -29,8 +28,10 @@ Output must be in JSON format
|
|
|
29
28
|
Here's a sample output:
|
|
30
29
|
{
|
|
31
30
|
"content": "Here is my follow-up question.",
|
|
32
|
-
"
|
|
31
|
+
"role": "user"
|
|
33
32
|
}
|
|
34
33
|
|
|
35
34
|
Output with a json object that continues the conversation, given the conversation history:
|
|
36
35
|
{{ conversation_history }}
|
|
36
|
+
|
|
37
|
+
{{ action }}
|