azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (277) hide show
  1. azure/ai/evaluation/__init__.py +83 -14
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/_common/constants.py +124 -2
  16. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  17. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  18. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  19. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  20. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  21. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  22. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  23. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  25. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  26. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  27. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  28. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  29. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  30. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  31. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  32. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  33. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  35. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  38. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  39. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  40. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  41. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  42. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  43. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  44. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  61. azure/ai/evaluation/_common/rai_service.py +578 -69
  62. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  63. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  64. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  65. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  66. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  67. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  68. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  69. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  70. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  71. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  73. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  74. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  76. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  77. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  78. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  79. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  80. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  81. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  82. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  83. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  84. azure/ai/evaluation/_common/utils.py +505 -27
  85. azure/ai/evaluation/_constants.py +148 -0
  86. azure/ai/evaluation/_converters/__init__.py +3 -0
  87. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  88. azure/ai/evaluation/_converters/_models.py +467 -0
  89. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  90. azure/ai/evaluation/_eval_mapping.py +83 -0
  91. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
  92. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  95. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
  96. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  97. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  98. azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
  99. azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
  100. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  101. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
  102. azure/ai/evaluation/_evaluate/_utils.py +237 -42
  103. azure/ai/evaluation/_evaluator_definition.py +76 -0
  104. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
  105. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  106. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  107. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
  108. azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
  109. azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
  110. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  111. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
  112. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
  113. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  114. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
  115. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
  116. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
  117. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
  118. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
  119. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  120. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  121. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  122. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
  123. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
  124. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
  125. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
  126. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
  127. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
  128. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  129. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  130. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  131. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
  132. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
  133. azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
  134. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
  135. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
  136. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  137. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  138. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  139. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
  140. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
  141. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
  142. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
  143. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  144. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  145. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  146. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  147. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  148. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  149. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  150. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  151. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  152. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  153. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  154. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  155. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  156. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  157. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  158. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  159. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  160. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  162. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  163. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  164. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  165. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  166. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  167. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  168. azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
  169. azure/ai/evaluation/_exceptions.py +24 -1
  170. azure/ai/evaluation/_http_utils.py +7 -5
  171. azure/ai/evaluation/_legacy/__init__.py +3 -0
  172. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  173. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  174. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  175. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  176. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  177. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  178. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  179. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  180. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  181. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  182. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  183. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  184. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  185. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  186. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  187. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  188. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  189. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  190. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  191. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  192. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  197. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  198. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  199. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  200. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  201. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  202. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  203. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  204. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  205. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  206. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  207. azure/ai/evaluation/_model_configurations.py +26 -0
  208. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  209. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  210. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  211. azure/ai/evaluation/_user_agent.py +32 -1
  212. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  213. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  214. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  215. azure/ai/evaluation/_version.py +2 -1
  216. azure/ai/evaluation/red_team/__init__.py +22 -0
  217. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  218. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  219. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  220. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  221. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  222. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  223. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  224. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  225. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  226. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  227. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  228. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  229. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  230. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  231. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  232. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  233. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  234. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  235. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  236. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  237. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  238. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  239. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  240. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  241. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  242. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  243. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  244. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  245. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  246. azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
  247. azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
  248. azure/ai/evaluation/simulator/_constants.py +1 -0
  249. azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
  250. azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
  251. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  252. azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
  253. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  254. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
  255. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  256. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  257. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
  258. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
  259. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
  260. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
  261. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  262. azure/ai/evaluation/simulator/_simulator.py +43 -19
  263. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
  264. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  265. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  266. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  267. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  268. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  269. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  270. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  271. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  272. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  273. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  274. azure/ai/evaluation/simulator/_tracing.py +0 -89
  275. azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
  276. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
  277. {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,430 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ """
5
+ MLflow integration module for Red Team Agent.
6
+
7
+ This module handles MLflow run creation, logging, and tracking for red team evaluations.
8
+ """
9
+
10
+ import json
11
+ import os
12
+ import tempfile
13
+ import uuid
14
+ from datetime import datetime
15
+ from typing import Any, Dict, List, Optional, Set, cast
16
+ from pathlib import Path
17
+
18
+ # Azure AI Evaluation imports
19
+ from azure.ai.evaluation._evaluate._eval_run import EvalRun
20
+ from azure.ai.evaluation._evaluate._utils import _trace_destination_from_project_scope, _get_ai_studio_url
21
+ from azure.ai.evaluation._evaluate._utils import extract_workspace_triad_from_trace_provider
22
+ from azure.ai.evaluation._version import VERSION
23
+ from azure.ai.evaluation._azure._clients import LiteMLClient
24
+ from azure.ai.evaluation._constants import EvaluationRunProperties, DefaultOpenEncoding
25
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
26
+ from azure.ai.evaluation._common import RedTeamUpload, ResultType
27
+ from azure.ai.evaluation._model_configurations import AzureAIProject
28
+
29
+ # Local imports
30
+ from ._red_team_result import (
31
+ RedTeamResult,
32
+ RedTeamRun,
33
+ ResultCount,
34
+ PerTestingCriteriaResult,
35
+ DataSource,
36
+ OutputItemsList,
37
+ )
38
+ from ._utils.logging_utils import log_error
39
+
40
+
41
+ class MLflowIntegration:
42
+ """Handles MLflow integration for red team evaluations."""
43
+
44
+ def __init__(self, logger, azure_ai_project, generated_rai_client, one_dp_project, scan_output_dir=None):
45
+ """Initialize the MLflow integration.
46
+
47
+ :param logger: Logger instance for logging
48
+ :param azure_ai_project: Azure AI project configuration
49
+ :param generated_rai_client: RAI client for service interactions
50
+ :param one_dp_project: Whether this is a OneDP project
51
+ :param scan_output_dir: Directory for scan outputs
52
+ """
53
+ self.logger = logger
54
+ self.azure_ai_project = azure_ai_project
55
+ self.generated_rai_client = generated_rai_client
56
+ self._one_dp_project = one_dp_project
57
+ self.scan_output_dir = scan_output_dir
58
+ self.ai_studio_url = None
59
+ self.trace_destination = None
60
+ self._run_id_override: Optional[str] = None
61
+ self._eval_id_override: Optional[str] = None
62
+ self._created_at_override: Optional[int] = None
63
+
64
+ def set_run_identity_overrides(
65
+ self,
66
+ *,
67
+ run_id: Optional[str] = None,
68
+ eval_id: Optional[str] = None,
69
+ created_at: Optional[Any] = None,
70
+ ) -> None:
71
+ """Allow callers to supply pre-existing identifiers for the run payload."""
72
+
73
+ self._run_id_override = str(run_id).strip() if run_id else None
74
+ self._eval_id_override = str(eval_id).strip() if eval_id else None
75
+
76
+ if created_at is None or created_at == "":
77
+ self._created_at_override = None
78
+ else:
79
+ if isinstance(created_at, datetime):
80
+ self._created_at_override = int(created_at.timestamp())
81
+ else:
82
+ try:
83
+ self._created_at_override = int(created_at)
84
+ except (TypeError, ValueError):
85
+ self._created_at_override = None
86
+
87
+ def start_redteam_mlflow_run(
88
+ self,
89
+ azure_ai_project: Optional[AzureAIProject] = None,
90
+ run_name: Optional[str] = None,
91
+ ) -> EvalRun:
92
+ """Start an MLFlow run for the Red Team Agent evaluation.
93
+
94
+ :param azure_ai_project: Azure AI project details for logging
95
+ :type azure_ai_project: Optional[AzureAIProject]
96
+ :param run_name: Optional name for the MLFlow run
97
+ :type run_name: Optional[str]
98
+ :return: The MLFlow run object
99
+ :rtype: EvalRun
100
+ :raises EvaluationException: If no azure_ai_project is provided or trace destination cannot be determined
101
+ """
102
+ if not azure_ai_project:
103
+ log_error(self.logger, "No azure_ai_project provided, cannot upload run")
104
+ raise EvaluationException(
105
+ message="No azure_ai_project provided",
106
+ blame=ErrorBlame.USER_ERROR,
107
+ category=ErrorCategory.MISSING_FIELD,
108
+ target=ErrorTarget.RED_TEAM,
109
+ )
110
+
111
+ if self._one_dp_project:
112
+ response = self.generated_rai_client._evaluation_onedp_client.start_red_team_run(
113
+ red_team=RedTeamUpload(
114
+ display_name=run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
115
+ )
116
+ )
117
+
118
+ self.ai_studio_url = response.properties.get("AiStudioEvaluationUri")
119
+ return response
120
+
121
+ else:
122
+ trace_destination = _trace_destination_from_project_scope(azure_ai_project)
123
+ if not trace_destination:
124
+ self.logger.warning("Could not determine trace destination from project scope")
125
+ raise EvaluationException(
126
+ message="Could not determine trace destination",
127
+ blame=ErrorBlame.SYSTEM_ERROR,
128
+ category=ErrorCategory.UNKNOWN,
129
+ target=ErrorTarget.RED_TEAM,
130
+ )
131
+
132
+ ws_triad = extract_workspace_triad_from_trace_provider(trace_destination)
133
+
134
+ management_client = LiteMLClient(
135
+ subscription_id=ws_triad.subscription_id,
136
+ resource_group=ws_triad.resource_group_name,
137
+ logger=self.logger,
138
+ credential=azure_ai_project.get("credential"),
139
+ )
140
+
141
+ tracking_uri = management_client.workspace_get_info(ws_triad.workspace_name).ml_flow_tracking_uri
142
+
143
+ run_display_name = run_name or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
144
+ self.logger.debug(f"Starting MLFlow run with name: {run_display_name}")
145
+ eval_run = EvalRun(
146
+ run_name=run_display_name,
147
+ tracking_uri=cast(str, tracking_uri),
148
+ subscription_id=ws_triad.subscription_id,
149
+ group_name=ws_triad.resource_group_name,
150
+ workspace_name=ws_triad.workspace_name,
151
+ management_client=management_client,
152
+ )
153
+ eval_run._start_run()
154
+ self.logger.debug(f"MLFlow run started successfully with ID: {eval_run.info.run_id}")
155
+
156
+ self.trace_destination = trace_destination
157
+ self.logger.debug(f"MLFlow run created successfully with ID: {eval_run}")
158
+
159
+ self.ai_studio_url = _get_ai_studio_url(
160
+ trace_destination=self.trace_destination,
161
+ evaluation_id=eval_run.info.run_id,
162
+ )
163
+
164
+ return eval_run
165
+
166
+ async def log_redteam_results_to_mlflow(
167
+ self,
168
+ redteam_result: RedTeamResult,
169
+ eval_run: EvalRun,
170
+ red_team_info: Dict,
171
+ _skip_evals: bool = False,
172
+ aoai_summary: Optional["RedTeamRun"] = None,
173
+ ) -> Optional[str]:
174
+ """Log the Red Team Agent results to MLFlow.
175
+
176
+ :param redteam_result: The output from the red team agent evaluation
177
+ :type redteam_result: RedTeamResult
178
+ :param eval_run: The MLFlow run object
179
+ :type eval_run: EvalRun
180
+ :param red_team_info: Red team tracking information
181
+ :type red_team_info: Dict
182
+ :param _skip_evals: Whether to log only data without evaluation results
183
+ :type _skip_evals: bool
184
+ :param aoai_summary: Pre-built AOAI-compatible summary (optional, will be built if not provided)
185
+ :type aoai_summary: Optional[RedTeamRun]
186
+ :return: The URL to the run in Azure AI Studio, if available
187
+ :rtype: Optional[str]
188
+ """
189
+ self.logger.debug(f"Logging results to MLFlow, _skip_evals={_skip_evals}")
190
+ artifact_name = "instance_results.json"
191
+ results_name = "results.json"
192
+ eval_info_name = "redteam_info.json"
193
+ properties = {}
194
+
195
+ with tempfile.TemporaryDirectory() as tmpdir:
196
+ if self.scan_output_dir:
197
+ # Save new format as results.json
198
+ results_path = os.path.join(self.scan_output_dir, results_name)
199
+ self.logger.debug(f"Saving results to scan output directory: {results_path}")
200
+ with open(results_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
201
+ # Use provided aoai_summary
202
+ if aoai_summary is None:
203
+ self.logger.error("aoai_summary must be provided to log_redteam_results_to_mlflow")
204
+ raise ValueError("aoai_summary parameter is required but was not provided")
205
+
206
+ payload = dict(aoai_summary) # Make a copy
207
+ json.dump(payload, f)
208
+
209
+ # Save legacy format as instance_results.json
210
+ artifact_path = os.path.join(self.scan_output_dir, artifact_name)
211
+ self.logger.debug(f"Saving artifact to scan output directory: {artifact_path}")
212
+ with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
213
+ legacy_payload = self._build_instance_results_payload(
214
+ redteam_result=redteam_result,
215
+ eval_run=eval_run,
216
+ red_team_info=red_team_info,
217
+ scan_name=getattr(eval_run, "display_name", None),
218
+ )
219
+ json.dump(legacy_payload, f)
220
+
221
+ eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
222
+ self.logger.debug(f"Saving evaluation info to scan output directory: {eval_info_path}")
223
+ with open(eval_info_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
224
+ # Remove evaluation_result from red_team_info before logging
225
+ red_team_info_logged = {}
226
+ for strategy, harms_dict in red_team_info.items():
227
+ red_team_info_logged[strategy] = {}
228
+ for harm, info_dict in harms_dict.items():
229
+ # Create a copy to avoid modifying the original
230
+ info_dict_copy = info_dict.copy()
231
+ info_dict_copy.pop("evaluation_result", None)
232
+ red_team_info_logged[strategy][harm] = info_dict_copy
233
+ f.write(json.dumps(red_team_info_logged, indent=2))
234
+ self.logger.debug(f"Successfully wrote redteam_info.json to: {eval_info_path}")
235
+
236
+ # Also save a human-readable scorecard if available
237
+ if not _skip_evals and redteam_result.scan_result:
238
+ from ._utils.formatting_utils import format_scorecard
239
+
240
+ scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
241
+ with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
242
+ f.write(format_scorecard(redteam_result.scan_result))
243
+ self.logger.debug(f"Saved scorecard to: {scorecard_path}")
244
+
245
+ # Create a dedicated artifacts directory with proper structure for MLFlow
246
+ # First, create the main artifact file that MLFlow expects (new format)
247
+ with open(
248
+ os.path.join(tmpdir, results_name),
249
+ "w",
250
+ encoding=DefaultOpenEncoding.WRITE,
251
+ ) as f:
252
+ # Use provided aoai_summary (required)
253
+ if aoai_summary is None:
254
+ self.logger.error("aoai_summary must be provided to log_redteam_results_to_mlflow")
255
+ raise ValueError("aoai_summary parameter is required but was not provided")
256
+
257
+ payload = dict(aoai_summary) # Make a copy
258
+ # Remove conversations for MLFlow artifact
259
+ payload.pop("conversations", None)
260
+ json.dump(payload, f)
261
+
262
+ # Also create legacy instance_results.json for compatibility
263
+ with open(
264
+ os.path.join(tmpdir, artifact_name),
265
+ "w",
266
+ encoding=DefaultOpenEncoding.WRITE,
267
+ ) as f:
268
+ legacy_payload = self._build_instance_results_payload(
269
+ redteam_result=redteam_result,
270
+ eval_run=eval_run,
271
+ red_team_info=red_team_info,
272
+ scan_name=getattr(eval_run, "display_name", None),
273
+ )
274
+ json.dump(legacy_payload, f)
275
+
276
+ # Copy all relevant files to the temp directory
277
+ import shutil
278
+
279
+ for file in os.listdir(self.scan_output_dir):
280
+ file_path = os.path.join(self.scan_output_dir, file)
281
+
282
+ # Skip directories and log files if not in debug mode
283
+ if os.path.isdir(file_path):
284
+ continue
285
+ if file.endswith(".log") and not os.environ.get("DEBUG"):
286
+ continue
287
+ if file.endswith(".gitignore"):
288
+ continue
289
+ if file == artifact_name:
290
+ continue
291
+
292
+ try:
293
+ shutil.copy(file_path, os.path.join(tmpdir, file))
294
+ self.logger.debug(f"Copied file to artifact directory: {file}")
295
+ except Exception as e:
296
+ self.logger.warning(f"Failed to copy file {file} to artifact directory: {str(e)}")
297
+
298
+ properties.update({"scan_output_dir": str(self.scan_output_dir)})
299
+ else:
300
+ # Use temporary directory as before if no scan output directory exists
301
+ results_file = Path(tmpdir) / results_name
302
+ with open(results_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
303
+ # Use provided aoai_summary (required)
304
+ if aoai_summary is None:
305
+ self.logger.error("aoai_summary must be provided to log_redteam_results_to_mlflow")
306
+ raise ValueError("aoai_summary parameter is required but was not provided")
307
+
308
+ payload = dict(aoai_summary) # Make a copy
309
+ # Include conversations only if _skip_evals is True
310
+ if _skip_evals and "conversations" not in payload:
311
+ payload["conversations"] = (
312
+ redteam_result.attack_details or redteam_result.scan_result.get("attack_details") or []
313
+ )
314
+ elif not _skip_evals:
315
+ payload.pop("conversations", None)
316
+ json.dump(payload, f)
317
+ self.logger.debug(f"Logged artifact: {results_name}")
318
+
319
+ # Also create legacy instance_results.json
320
+ artifact_file = Path(tmpdir) / artifact_name
321
+ with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
322
+ legacy_payload = self._build_instance_results_payload(
323
+ redteam_result=redteam_result,
324
+ eval_run=eval_run,
325
+ red_team_info=red_team_info,
326
+ scan_name=getattr(eval_run, "display_name", None),
327
+ )
328
+ json.dump(legacy_payload, f)
329
+ self.logger.debug(f"Logged artifact: {artifact_name}")
330
+
331
+ properties.update(
332
+ {
333
+ "redteaming": "asr",
334
+ EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
335
+ }
336
+ )
337
+
338
+ metrics = {}
339
+ if redteam_result.scan_result:
340
+ scorecard = redteam_result.scan_result["scorecard"]
341
+ joint_attack_summary = scorecard["joint_risk_attack_summary"]
342
+
343
+ if joint_attack_summary:
344
+ for risk_category_summary in joint_attack_summary:
345
+ risk_category = risk_category_summary.get("risk_category").lower()
346
+ for key, value in risk_category_summary.items():
347
+ if key != "risk_category":
348
+ metrics.update({f"{risk_category}_{key}": cast(float, value)})
349
+ self.logger.debug(f"Logged metric: {risk_category}_{key} = {value}")
350
+
351
+ if self._one_dp_project:
352
+ try:
353
+ create_evaluation_result_response = (
354
+ self.generated_rai_client._evaluation_onedp_client.create_evaluation_result(
355
+ name=str(uuid.uuid4()),
356
+ path=tmpdir,
357
+ metrics=metrics,
358
+ result_type=ResultType.REDTEAM,
359
+ )
360
+ )
361
+
362
+ update_run_response = self.generated_rai_client._evaluation_onedp_client.update_red_team_run(
363
+ name=eval_run.id,
364
+ red_team=RedTeamUpload(
365
+ id=eval_run.id,
366
+ display_name=eval_run.display_name
367
+ or f"redteam-agent-{datetime.now().strftime('%Y%m%d-%H%M%S')}",
368
+ status="Completed",
369
+ outputs={
370
+ "evaluationResultId": create_evaluation_result_response.id,
371
+ },
372
+ properties=properties,
373
+ ),
374
+ )
375
+ self.logger.debug(f"Updated UploadRun: {update_run_response.id}")
376
+ except Exception as e:
377
+ self.logger.warning(f"Failed to upload red team results to AI Foundry: {str(e)}")
378
+ else:
379
+ # Log the entire directory to MLFlow
380
+ try:
381
+ eval_run.log_artifact(tmpdir, artifact_name)
382
+ if self.scan_output_dir:
383
+ eval_run.log_artifact(tmpdir, eval_info_name)
384
+ self.logger.debug(f"Successfully logged artifacts directory to AI Foundry")
385
+ except Exception as e:
386
+ self.logger.warning(f"Failed to log artifacts to AI Foundry: {str(e)}")
387
+
388
+ for k, v in metrics.items():
389
+ eval_run.log_metric(k, v)
390
+ self.logger.debug(f"Logged metric: {k} = {v}")
391
+
392
+ eval_run.write_properties_to_run_history(properties)
393
+ eval_run._end_run("FINISHED")
394
+
395
+ self.logger.info("Successfully logged results to AI Foundry")
396
+ return None
397
+
398
+ def _build_instance_results_payload(
399
+ self,
400
+ redteam_result: RedTeamResult,
401
+ eval_run: Optional[Any] = None,
402
+ red_team_info: Optional[Dict] = None,
403
+ scan_name: Optional[str] = None,
404
+ ) -> Dict:
405
+ """Assemble the legacy structure for instance_results.json (scan_result format)."""
406
+
407
+ scan_result = cast(Dict[str, Any], redteam_result.scan_result or {})
408
+
409
+ # Return the scan_result directly for legacy compatibility
410
+ # This maintains the old format that was expected previously
411
+ # Filter out AOAI_Compatible properties - those belong in results.json only
412
+ legacy_payload = (
413
+ {
414
+ k: v
415
+ for k, v in scan_result.items()
416
+ if k not in ["AOAI_Compatible_Summary", "AOAI_Compatible_Row_Results"]
417
+ }
418
+ if scan_result
419
+ else {}
420
+ )
421
+
422
+ # Ensure we have the basic required fields
423
+ if "scorecard" not in legacy_payload:
424
+ legacy_payload["scorecard"] = {}
425
+ if "parameters" not in legacy_payload:
426
+ legacy_payload["parameters"] = {}
427
+ if "attack_details" not in legacy_payload:
428
+ legacy_payload["attack_details"] = redteam_result.attack_details or []
429
+
430
+ return legacy_payload